[clang] [AArch64][SME] Warn when using a streaming builtin from a non-streaming function (PR #74064)
Sam Tebbs via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 1 08:17:13 PST 2023
https://github.com/SamTebbs33 updated https://github.com/llvm/llvm-project/pull/74064
>From f6a990a000b555d7f8ef0b2a99e3fea98420e899 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs <samuel.tebbs at arm.com>
Date: Thu, 30 Nov 2023 13:42:50 +0000
Subject: [PATCH 1/2] [AArch64][SME] Warn when using a streaming builtin from a
non-streaming function
This PR adds a warning that's emitted when a non-streaming or
non-streaming-compatible builtin is called in an unsuitable function.
Uses work by Kerry McLaughlin.
---
clang/include/clang/Basic/CMakeLists.txt | 6 +
.../clang/Basic/DiagnosticSemaKinds.td | 3 +
clang/include/clang/Sema/Sema.h | 3 +
clang/lib/Sema/SemaChecking.cpp | 191 ++++++++++++++++++
.../Sema/aarch64-incompat-sm-builtin-calls.c | 21 ++
clang/utils/TableGen/SveEmitter.cpp | 68 +++++++
clang/utils/TableGen/TableGen.cpp | 9 +
clang/utils/TableGen/TableGenBackends.h | 1 +
8 files changed, 302 insertions(+)
diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt
index 085e316fcc671df..bdd72d1d63c431b 100644
--- a/clang/include/clang/Basic/CMakeLists.txt
+++ b/clang/include/clang/Basic/CMakeLists.txt
@@ -97,6 +97,12 @@ clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen
clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks
SOURCE arm_sme.td
TARGET ClangARMSmeSemaRangeChecks)
+clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeStreamingAttrs)
+clang_tablegen(arm_sme_builtins_za_state.inc -gen-arm-sme-builtin-za-state
+ SOURCE arm_sme.td
+ TARGET ClangARMSmeBuiltinsZAState)
clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def
SOURCE arm_cde.td
TARGET ClangARMCdeBuiltinsDef)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 6dfb2d7195203a3..c7036fc881a13e1 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3151,6 +3151,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error<
def warn_attribute_arm_sm_incompat_builtin : Warning<
"builtin call has undefined behaviour when called from a %0 function">,
InGroup<DiagGroup<"undefined-arm-streaming">>;
+def warn_attribute_arm_za_builtin_no_za_state : Warning<
+ "builtin call is not valid when calling from a function without active ZA state">,
+ InGroup<DiagGroup<"undefined-arm-za">>;
def err_sve_vector_in_non_sve_target : Error<
"SVE vector type %0 cannot be used in a target without sve">;
def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 6de1a098e067a38..c13c6942c219700 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -13845,7 +13845,10 @@ class Sema final {
bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool ParseSVEImmChecks(CallExpr *TheCall,
+ SmallVector<std::tuple<int, int, int>, 3> &ImmChecks);
bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
+ bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall);
bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
CallExpr *TheCall);
bool CheckARMCoprocessorImmediate(const TargetInfo &TI, const Expr *CoprocArg,
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25d3..f27eb8ad95cc703 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2995,6 +2995,134 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible };
+bool Sema::ParseSVEImmChecks(
+ CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
+ // Perform all the immediate checks for this builtin call.
+ bool HasError = false;
+ for (auto &I : ImmChecks) {
+ int ArgNum, CheckTy, ElementSizeInBits;
+ std::tie(ArgNum, CheckTy, ElementSizeInBits) = I;
+
+ typedef bool (*OptionSetCheckFnTy)(int64_t Value);
+
+ // Function that checks whether the operand (ArgNum) is an immediate
+ // that is one of the predefined values.
+ auto CheckImmediateInSet = [&](OptionSetCheckFnTy CheckImm,
+ int ErrDiag) -> bool {
+ // We can't check the value of a dependent argument.
+ Expr *Arg = TheCall->getArg(ArgNum);
+ if (Arg->isTypeDependent() || Arg->isValueDependent())
+ return false;
+
+ // Check constant-ness first.
+ llvm::APSInt Imm;
+ if (SemaBuiltinConstantArg(TheCall, ArgNum, Imm))
+ return true;
+
+ if (!CheckImm(Imm.getSExtValue()))
+ return Diag(TheCall->getBeginLoc(), ErrDiag) << Arg->getSourceRange();
+ return false;
+ };
+
+ switch ((SVETypeFlags::ImmCheckType)CheckTy) {
+ case SVETypeFlags::ImmCheck0_31:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 31))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_13:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 13))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck1_16:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, 16))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_7:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 7))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckExtract:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (2048 / ElementSizeInBits) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftRight:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1, ElementSizeInBits))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftRightNarrow:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 1,
+ ElementSizeInBits / 2))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckShiftLeft:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ ElementSizeInBits - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndex:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (1 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndexCompRotate:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (2 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckLaneIndexDot:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0,
+ (128 / (4 * ElementSizeInBits)) - 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckComplexRot90_270:
+ if (CheckImmediateInSet([](int64_t V) { return V == 90 || V == 270; },
+ diag::err_rotation_argument_to_cadd))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheckComplexRotAll90:
+ if (CheckImmediateInSet(
+ [](int64_t V) {
+ return V == 0 || V == 90 || V == 180 || V == 270;
+ },
+ diag::err_rotation_argument_to_cmla))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_1:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 1))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_3:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_0:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_15:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck0_255:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255))
+ HasError = true;
+ break;
+ case SVETypeFlags::ImmCheck2_4_Mul2:
+ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 2, 4) ||
+ SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2))
+ HasError = true;
+ break;
+ }
+ }
+
+ return HasError;
+}
+
static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) {
if (FD->hasAttr<ArmLocallyStreamingAttr>())
return ArmStreaming;
@@ -3023,6 +3151,66 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
<< TheCall->getSourceRange() << "streaming compatible";
return;
}
+
+ if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) {
+ S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
+ << TheCall->getSourceRange() << "non-streaming";
+ }
+}
+
+static bool hasSMEZAState(const FunctionDecl *FD) {
+ if (FD->hasAttr<ArmNewZAAttr>())
+ return true;
+ if (const auto *T = FD->getType()->getAs<FunctionProtoType>())
+ if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask)
+ return true;
+ return false;
+}
+
+static bool hasSMEZAState(unsigned BuiltinID) {
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_BUILTIN_HAS_ZA_STATE
+#include "clang/Basic/arm_sme_builtins_za_state.inc"
+#undef GET_SME_BUILTIN_HAS_ZA_STATE
+ }
+}
+
+bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
+ if (const FunctionDecl *FD = getCurFunctionDecl()) {
+ bool debug = FD->getDeclName().getAsString() == "incompat_sve_sm";
+ std::optional<ArmStreamingType> BuiltinType;
+
+ switch (BuiltinID) {
+ default:
+ break;
+#define GET_SME_STREAMING_ATTRS
+#include "clang/Basic/arm_sme_streaming_attrs.inc"
+#undef GET_SME_STREAMING_ATTRS
+ }
+
+ if (BuiltinType)
+ checkArmStreamingBuiltin(*this, TheCall, FD, *BuiltinType);
+
+ if (hasSMEZAState(BuiltinID) && !hasSMEZAState(FD))
+ Diag(TheCall->getBeginLoc(),
+ diag::warn_attribute_arm_za_builtin_no_za_state)
+ << TheCall->getSourceRange();
+ }
+
+ // Range check SME intrinsics that take immediate values.
+ SmallVector<std::tuple<int, int, int>, 3> ImmChecks;
+
+ switch (BuiltinID) {
+ default:
+ return false;
+#define GET_SME_IMMEDIATE_CHECK
+#include "clang/Basic/arm_sme_sema_rangechecks.inc"
+#undef GET_SME_IMMEDIATE_CHECK
+ }
+
+ return ParseSVEImmChecks(TheCall, ImmChecks);
}
bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
@@ -3559,6 +3747,9 @@ bool Sema::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
if (CheckSVEBuiltinFunctionCall(BuiltinID, TheCall))
return true;
+ if (CheckSMEBuiltinFunctionCall(BuiltinID, TheCall))
+ return true;
+
// For intrinsics which take an immediate value as part of the instruction,
// range check them here.
unsigned i = 0, l = 0, u = 0;
diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
index e77e09c4435188d..8f33075c7a9b51f 100644
--- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
+++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
@@ -5,6 +5,7 @@
// REQUIRES: aarch64-registered-target
#include "arm_neon.h"
+#include "arm_sme_draft_spec_subject_to_change.h"
int16x8_t incompat_neon_sm(int16x8_t splat) __arm_streaming {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming function}}
@@ -20,3 +21,23 @@ int16x8_t incompat_neon_smc(int16x8_t splat) __arm_streaming_compatible {
// expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33);
}
+
+void incompat_sme_norm(svbool_t pg, void const *ptr) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_smc(svbool_t pg, void const *ptr) __arm_streaming_compatible __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a streaming compatible function}}
+ return __builtin_sme_svld1_hor_za128(0, 0, pg, ptr);
+}
+
+void incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_shared_za {
+ // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
+ svmops_za32_f32_m(0, pn, pm, zn, zm);
+}
+
+svbool_t streaming_caller_ptrue(void) __arm_streaming {
+ // expected-no-warning
+ return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4));
+}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index b380bd9dfe6643a..cb02ef09e130eb3 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -378,6 +378,9 @@ class SVEEmitter {
/// Emit all the information needed to map builtin -> LLVM IR intrinsic.
void createSMECodeGenMap(raw_ostream &o);
+ /// Create a table for a builtin's requirement for PSTATE.SM.
+ void createStreamingAttrs(raw_ostream &o, ACLEKind Kind);
+
/// Emit all the range checks for the immediates.
void createSMERangeChecks(raw_ostream &o);
@@ -1369,6 +1372,12 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
OS << "#define __aio static __inline__ __attribute__((__always_inline__, "
"__nodebug__, __overloadable__))\n\n";
+ OS << "#ifdef __ARM_FEATURE_SME\n";
+ OS << "#define __asc __attribute__((arm_streaming_compatible))\n";
+ OS << "#else\n";
+ OS << "#define __asc\n";
+ OS << "#endif\n\n";
+
// Add reinterpret functions.
for (auto [N, Suffix] :
std::initializer_list<std::pair<unsigned, const char *>>{
@@ -1688,6 +1697,61 @@ void SVEEmitter::createSMERangeChecks(raw_ostream &OS) {
OS << "#endif\n\n";
}
+void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
+ std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
+ SmallVector<std::unique_ptr<Intrinsic>, 128> Defs;
+ for (auto *R : RV)
+ createIntrinsic(R, Defs);
+
+ // The mappings must be sorted based on BuiltinID.
+ llvm::sort(Defs, [](const std::unique_ptr<Intrinsic> &A,
+ const std::unique_ptr<Intrinsic> &B) {
+ return A->getMangledName() < B->getMangledName();
+ });
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "#ifdef GET_SME_STREAMING_ATTRS\n";
+ break;
+ case ACLEKind::SVE:
+ OS << "#ifdef GET_SVE_STREAMING_ATTRS\n";
+ break;
+ }
+
+ // Ensure these are only emitted once.
+ std::set<std::string> Emitted;
+
+ uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming");
+ uint64_t IsStreamingCompatibleFlag =
+ getEnumValueForFlag("IsStreamingCompatible");
+ for (auto &Def : Defs) {
+ if (Emitted.find(Def->getMangledName()) != Emitted.end())
+ continue;
+
+ switch (Kind) {
+ case ACLEKind::SME:
+ OS << "case SME::BI__builtin_sme_";
+ break;
+ case ACLEKind::SVE:
+ OS << "case SVE::BI__builtin_sve_";
+ break;
+ }
+ OS << Def->getMangledName() << ":\n";
+
+ if (Def->isFlagSet(IsStreamingFlag))
+ OS << " BuiltinType = ArmStreaming;\n";
+ else if (Def->isFlagSet(IsStreamingCompatibleFlag))
+ OS << " BuiltinType = ArmStreamingCompatible;\n";
+ else
+ OS << " BuiltinType = ArmNonStreaming;\n";
+ OS << " break;\n";
+
+ Emitted.insert(Def->getMangledName());
+ }
+
+ OS << "#endif\n\n";
+}
+
namespace clang {
void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createHeader(OS);
@@ -1724,4 +1788,8 @@ void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) {
void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) {
SVEEmitter(Records).createSMERangeChecks(OS);
}
+
+void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) {
+ SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME);
+}
} // End namespace clang
diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp
index 7efb6c731d3e5ee..9ba2fb07f1380da 100644
--- a/clang/utils/TableGen/TableGen.cpp
+++ b/clang/utils/TableGen/TableGen.cpp
@@ -89,6 +89,8 @@ enum ActionType {
GenArmSmeBuiltins,
GenArmSmeBuiltinCG,
GenArmSmeRangeChecks,
+ GenArmSmeStreamingAttrs,
+ GenArmSmeBuiltinZAState,
GenArmCdeHeader,
GenArmCdeBuiltinDef,
GenArmCdeBuiltinSema,
@@ -251,6 +253,10 @@ cl::opt<ActionType> Action(
"Generate arm_sme_builtin_cg_map.inc for clang"),
clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks",
"Generate arm_sme_sema_rangechecks.inc for clang"),
+ clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs",
+ "Generate arm_sme_streaming_attrs.inc for clang"),
+ clEnumValN(GenArmSmeBuiltinZAState, "gen-arm-sme-builtin-za-state",
+ "Generate arm_sme_builtins_za_state.inc for clang"),
clEnumValN(GenArmMveHeader, "gen-arm-mve-header",
"Generate arm_mve.h for clang"),
clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def",
@@ -500,6 +506,9 @@ bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
case GenArmSmeRangeChecks:
EmitSmeRangeChecks(Records, OS);
break;
+ case GenArmSmeStreamingAttrs:
+ EmitSmeStreamingAttrs(Records, OS);
+ break;
case GenArmCdeHeader:
EmitCdeHeader(Records, OS);
break;
diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h
index d8f447069376bca..2f1c96bfa59640c 100644
--- a/clang/utils/TableGen/TableGenBackends.h
+++ b/clang/utils/TableGen/TableGenBackends.h
@@ -109,6 +109,7 @@ void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
+void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);
>From f8ff85b65e1bc1e30a4a1071f814b2e296b18307 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs at arm.com>
Date: Fri, 1 Dec 2023 16:08:21 +0000
Subject: [PATCH 2/2] fixup! adjust tests
---
.../aarch64-sme-intrinsics/acle_sme_add-i32.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_add-i64.c | 16 +-
.../aarch64-sme-intrinsics/acle_sme_ld1.c | 32 ++-
.../acle_sme_ld1_vnum.c | 32 ++-
.../acle_sme_mopa-za32.c | 14 +-
.../acle_sme_mopa-za64.c | 10 +-
.../acle_sme_mops-za32.c | 14 +-
.../acle_sme_mops-za64.c | 10 +-
.../aarch64-sme-intrinsics/acle_sme_read.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_st1.c | 32 ++-
.../acle_sme_st1_vnum.c | 32 ++-
.../aarch64-sme-intrinsics/acle_sme_write.c | 192 +++++++++---------
.../aarch64-sme-intrinsics/acle_sme_target.c | 10 +-
13 files changed, 289 insertions(+), 313 deletions(-)
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
index e444321be41b2cb..ee6c1c9dd566b5a 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za32, _s32, _m)(3, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
+void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _u32, _m)(3, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 0, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) {
+void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za32, _s32, _m)(3, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
index 4b2f71d607eb60c..254ea89d22c501a 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -50,7 +50,7 @@ void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -70,7 +70,7 @@ void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -90,7 +90,7 @@ void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddha_za64, _s64, _m)(7, pn, pm, zn);
}
@@ -110,7 +110,7 @@ void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(0, pn, pm, zn);
}
@@ -130,7 +130,7 @@ void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
+void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _u64, _m)(7, pn, pm, zn);
}
@@ -150,7 +150,7 @@ void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 0, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(0, pn, pm, zn);
}
@@ -170,7 +170,7 @@ void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) {
+void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svaddva_za64, _s64, _m)(7, pn, pm, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
index 72d159b19118e3a..5622568c4cd76b6 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1.c
@@ -1,17 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
#include <arm_sme_draft_spec_subject_to_change.h>
-#ifdef DISABLE_SME_ATTRIBUTES
-#define ARM_STREAMING_ATTR
-#else
-#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
-#endif
-
// CHECK-C-LABEL: define dso_local void @test_svld1_hor_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
@@ -28,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_hor_za8(0, slice_base, pg, ptr);
svld1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -51,7 +45,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, con
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_hor_za16(0, slice_base, pg, ptr);
svld1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -74,7 +68,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_hor_za32(0, slice_base, pg, ptr);
svld1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -97,7 +91,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_hor_za64(0, slice_base, pg, ptr);
svld1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -118,7 +112,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_hor_za128(0, slice_base, pg, ptr);
svld1_hor_za128(15, slice_base, pg, ptr);
}
@@ -139,7 +133,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, c
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_ver_za8(0, slice_base, pg, ptr);
svld1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -162,7 +156,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, con
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_ver_za16(0, slice_base, pg, ptr);
svld1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -185,7 +179,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_ver_za32(0, slice_base, pg, ptr);
svld1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -208,7 +202,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_ver_za64(0, slice_base, pg, ptr);
svld1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -229,7 +223,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, co
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) {
+void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const void *ptr) __arm_streaming {
svld1_ver_za128(0, slice_base, pg, ptr);
svld1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
index cf49f62664eee8c..0fe7dcfc0a79946 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_ld1_vnum.c
@@ -1,17 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
#include <arm_sme_draft_spec_subject_to_change.h>
-#ifdef DISABLE_SME_ATTRIBUTES
-#define ARM_STREAMING_ATTR
-#else
-#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
-#endif
-
// CHECK-C-LABEL: define dso_local void @test_svld1_hor_vnum_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
@@ -34,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -63,7 +57,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -92,7 +86,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -121,7 +115,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -148,7 +142,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -175,7 +169,7 @@ ARM_STREAMING_ATTR void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -204,7 +198,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_hor_za8(uint32_t slice_base, svbool_t pg,
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -233,7 +227,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -262,7 +256,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) {
+void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const void *ptr, int64_t vnum) __arm_streaming {
svld1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svld1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
index 88206eec76b97e6..b90c9be4a6e094d 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
index 1a1ab1e00d0ec62..a56ce4d17f1261f 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmopa_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumopa_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmopa_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
index 1633cc6ed32e059..abcf4c2e698d7fa 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
+void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -42,7 +42,7 @@ void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
+void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
@@ -62,7 +62,7 @@ void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x bfloat> [[ZN]], <vscale x 8 x bfloat> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) {
+void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _bf16, _m)(0, pn, pm, zn, zm);
}
@@ -82,7 +82,7 @@ void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x half> [[ZN]], <vscale x 8 x half> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) {
+void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f16, _m)(1, pn, pm, zn, zm);
}
@@ -102,7 +102,7 @@ void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]], <vscale x 4 x float> [[ZN]], <vscale x 4 x float> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) {
+void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za32, _f32, _m)(1, pn, pm, zn, zm);
}
@@ -118,7 +118,7 @@ void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) {
+void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za32, _s8, _m)(0, pn, pm, zn, zm);
}
@@ -134,7 +134,7 @@ void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 0, <vscale x 16 x i1> [[PN]], <vscale x 16 x i1> [[PM]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) {
+void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za32, _u8, _m)(0, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
index b17df30c0f8513b..b26b9e4e51e0571 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c
@@ -30,7 +30,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) {
+void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _s16, _m)(7, pn, pm, zn, zm);
}
@@ -50,7 +50,7 @@ void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) {
+void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _u16, _m)(0, pn, pm, zn, zm);
}
@@ -70,7 +70,7 @@ void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[ZN]], <vscale x 2 x double> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) {
+void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) __arm_streaming {
SME_ACLE_FUNC(svmops_za64, _f64, _m)(7, pn, pm, zn, zm);
}
@@ -90,7 +90,7 @@ void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 0, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) {
+void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svsumops_za64, _s16, _m)(0, pn, pm, zn, zm);
}
@@ -110,7 +110,7 @@ void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]], <vscale x 8 x i16> [[ZN]], <vscale x 8 x i16> [[ZM]])
// CHECK-CXX-NEXT: ret void
//
-void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) {
+void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) __arm_streaming {
SME_ACLE_FUNC(svusmops_za64, _u16, _m)(7, pn, pm, zn, zm);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
index d63900129f9946e..a15599d186a87a8 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -44,7 +44,7 @@ svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -63,7 +63,7 @@ svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -83,7 +83,7 @@ svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -102,7 +102,7 @@ svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -122,7 +122,7 @@ svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -141,7 +141,7 @@ svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -161,7 +161,7 @@ svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -178,7 +178,7 @@ svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -196,7 +196,7 @@ svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -215,7 +215,7 @@ svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -235,7 +235,7 @@ svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -254,7 +254,7 @@ svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -274,7 +274,7 @@ svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -293,7 +293,7 @@ svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -313,7 +313,7 @@ svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -332,7 +332,7 @@ svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -352,7 +352,7 @@ svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -371,7 +371,7 @@ svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -391,7 +391,7 @@ svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -410,7 +410,7 @@ svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -430,7 +430,7 @@ svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -449,7 +449,7 @@ svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -469,7 +469,7 @@ svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -486,7 +486,7 @@ svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -502,7 +502,7 @@ svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -520,7 +520,7 @@ svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -538,7 +538,7 @@ svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -556,7 +556,7 @@ svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -574,7 +574,7 @@ svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -592,7 +592,7 @@ svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -610,7 +610,7 @@ svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -626,7 +626,7 @@ svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -642,7 +642,7 @@ svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.horiz.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -660,7 +660,7 @@ svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -678,7 +678,7 @@ svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.horiz.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -696,7 +696,7 @@ svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -714,7 +714,7 @@ svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.horiz.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -732,7 +732,7 @@ svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -750,7 +750,7 @@ svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.horiz.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -768,7 +768,7 @@ svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -786,7 +786,7 @@ svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.horiz.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -804,7 +804,7 @@ svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -822,7 +822,7 @@ svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.horiz.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -840,7 +840,7 @@ svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -858,7 +858,7 @@ svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.horiz.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -876,7 +876,7 @@ svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -894,7 +894,7 @@ svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.horiz.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base);
}
@@ -910,7 +910,7 @@ svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -928,7 +928,7 @@ svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice);
}
@@ -947,7 +947,7 @@ svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -967,7 +967,7 @@ svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice);
}
@@ -986,7 +986,7 @@ svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1006,7 +1006,7 @@ svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice);
}
@@ -1025,7 +1025,7 @@ svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1045,7 +1045,7 @@ svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice);
}
@@ -1062,7 +1062,7 @@ svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1080,7 +1080,7 @@ svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.read.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 15;
return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice);
}
@@ -1099,7 +1099,7 @@ svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1119,7 +1119,7 @@ svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.read.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice);
}
@@ -1138,7 +1138,7 @@ svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1158,7 +1158,7 @@ svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.read.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice);
}
@@ -1177,7 +1177,7 @@ svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1197,7 +1197,7 @@ svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.read.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice);
}
@@ -1216,7 +1216,7 @@ svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1236,7 +1236,7 @@ svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.read.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice);
}
@@ -1255,7 +1255,7 @@ svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1275,7 +1275,7 @@ svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.read.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 7;
return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice);
}
@@ -1294,7 +1294,7 @@ svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1314,7 +1314,7 @@ svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.read.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 3;
return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice);
}
@@ -1333,7 +1333,7 @@ svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1353,7 +1353,7 @@ svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.read.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
uint32_t slice = slice_base + 1;
return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice);
}
@@ -1370,7 +1370,7 @@ svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t sli
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base);
}
@@ -1386,7 +1386,7 @@ svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base)
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) {
+svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base);
}
@@ -1404,7 +1404,7 @@ svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base);
}
@@ -1422,7 +1422,7 @@ svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) {
+svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base);
}
@@ -1440,7 +1440,7 @@ svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base);
}
@@ -1458,7 +1458,7 @@ svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) {
+svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base);
}
@@ -1476,7 +1476,7 @@ svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base);
}
@@ -1494,7 +1494,7 @@ svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_ba
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) {
+svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base);
}
@@ -1510,7 +1510,7 @@ svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base);
}
@@ -1526,7 +1526,7 @@ svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_bas
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.readq.vert.nxv16i8(<vscale x 16 x i8> [[ZD]], <vscale x 16 x i1> [[PG]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) {
+svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base);
}
@@ -1544,7 +1544,7 @@ svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_b
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base);
}
@@ -1562,7 +1562,7 @@ svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.readq.vert.nxv8i16(<vscale x 8 x i16> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x i16> [[TMP1]]
//
-svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) {
+svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base);
}
@@ -1580,7 +1580,7 @@ svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base);
}
@@ -1598,7 +1598,7 @@ svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.readq.vert.nxv4i32(<vscale x 4 x i32> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x i32> [[TMP1]]
//
-svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) {
+svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base);
}
@@ -1616,7 +1616,7 @@ svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base);
}
@@ -1634,7 +1634,7 @@ svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sme.readq.vert.nxv2i64(<vscale x 2 x i64> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x i64> [[TMP1]]
//
-svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) {
+svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base);
}
@@ -1652,7 +1652,7 @@ svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base);
}
@@ -1670,7 +1670,7 @@ svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.readq.vert.nxv8f16(<vscale x 8 x half> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x half> [[TMP1]]
//
-svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base);
}
@@ -1688,7 +1688,7 @@ svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base);
}
@@ -1706,7 +1706,7 @@ svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t s
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.readq.vert.nxv8bf16(<vscale x 8 x bfloat> [[ZD]], <vscale x 8 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 8 x bfloat> [[TMP1]]
//
-svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) {
+svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base);
}
@@ -1724,7 +1724,7 @@ svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base);
}
@@ -1742,7 +1742,7 @@ svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.readq.vert.nxv4f32(<vscale x 4 x float> [[ZD]], <vscale x 4 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 4 x float> [[TMP1]]
//
-svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base);
}
@@ -1760,7 +1760,7 @@ svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t sl
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 0, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base);
}
@@ -1778,7 +1778,7 @@ svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slic
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sme.readq.vert.nxv2f64(<vscale x 2 x double> [[ZD]], <vscale x 2 x i1> [[TMP0]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret <vscale x 2 x double> [[TMP1]]
//
-svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) {
+svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
index 9ea5fa10c1bbc7b..9ed158aedf7e3b3 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1.c
@@ -1,17 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
#include <arm_sme_draft_spec_subject_to_change.h>
-#ifdef DISABLE_SME_ATTRIBUTES
-#define ARM_STREAMING_ATTR
-#else
-#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
-#endif
-
// CHECK-C-LABEL: define dso_local void @test_svst1_hor_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
@@ -28,7 +22,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_hor_za8(0, slice_base, pg, ptr);
svst1_hor_za8(0, slice_base + 15, pg, ptr);
}
@@ -51,7 +45,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, voi
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_hor_za16(0, slice_base, pg, ptr);
svst1_hor_za16(1, slice_base + 7, pg, ptr);
}
@@ -74,7 +68,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_hor_za32(0, slice_base, pg, ptr);
svst1_hor_za32(3, slice_base + 3, pg, ptr);
}
@@ -97,7 +91,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_hor_za64(0, slice_base, pg, ptr);
svst1_hor_za64(7, slice_base + 1, pg, ptr);
}
@@ -118,7 +112,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_hor_za128(0, slice_base, pg, ptr);
svst1_hor_za128(15, slice_base, pg, ptr);
}
@@ -139,7 +133,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, v
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[PTR]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za8(0, slice_base, pg, ptr);
svst1_ver_za8(0, slice_base + 15, pg, ptr);
}
@@ -162,7 +156,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, voi
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[PTR]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, slice_base, pg, ptr);
svst1_ver_za16(1, slice_base + 7, pg, ptr);
}
@@ -185,7 +179,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[PTR]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za32(0, slice_base, pg, ptr);
svst1_ver_za32(3, slice_base + 3, pg, ptr);
}
@@ -208,7 +202,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[PTR]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za64(0, slice_base, pg, ptr);
svst1_ver_za64(7, slice_base + 1, pg, ptr);
}
@@ -229,7 +223,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, vo
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[PTR]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) {
+void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za128(0, slice_base, pg, ptr);
svst1_ver_za128(15, slice_base, pg, ptr);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
index 6b534f25a1f8db8..627098d9365bd43 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_st1_vnum.c
@@ -1,17 +1,11 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3
// REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
-// RUN: %clang_cc1 -DDISABLE_SME_ATTRIBUTES -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s
#include <arm_sme_draft_spec_subject_to_change.h>
-#ifdef DISABLE_SME_ATTRIBUTES
-#define ARM_STREAMING_ATTR
-#else
-#define ARM_STREAMING_ATTR __attribute__((arm_streaming))
-#endif
-
// CHECK-C-LABEL: define dso_local void @test_svst1_hor_vnum_za8(
// CHECK-C-SAME: i32 noundef [[SLICE_BASE:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr noundef [[PTR:%.*]], i64 noundef [[VNUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-C-NEXT: entry:
@@ -34,7 +28,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_hor_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -63,7 +57,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_hor_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -92,7 +86,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_hor_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -121,7 +115,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_hor_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -148,7 +142,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_hor_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_hor_vnum_za128(15, slice_base, pg, ptr, vnum);
}
@@ -175,7 +169,7 @@ ARM_STREAMING_ATTR void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1b.vert(<vscale x 16 x i1> [[PG]], ptr [[TMP1]], i32 0, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_ver_vnum_za8(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za8(0, slice_base + 15, pg, ptr, vnum);
}
@@ -204,7 +198,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1h.vert(<vscale x 8 x i1> [[TMP0]], ptr [[TMP2]], i32 1, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_ver_vnum_za16(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za16(1, slice_base + 7, pg, ptr, vnum);
}
@@ -233,7 +227,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1w.vert(<vscale x 4 x i1> [[TMP0]], ptr [[TMP2]], i32 3, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_ver_vnum_za32(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za32(3, slice_base + 3, pg, ptr, vnum);
}
@@ -262,7 +256,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1d.vert(<vscale x 2 x i1> [[TMP0]], ptr [[TMP2]], i32 7, i32 [[ADD]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_ver_vnum_za64(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za64(7, slice_base + 1, pg, ptr, vnum);
}
@@ -289,7 +283,7 @@ ARM_STREAMING_ATTR void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t p
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.st1q.vert(<vscale x 1 x i1> [[TMP0]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE]])
// CHECK-CXX-NEXT: ret void
//
-ARM_STREAMING_ATTR void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) {
+void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, void *ptr, int64_t vnum) __arm_streaming {
svst1_ver_vnum_za128(0, slice_base, pg, ptr, vnum);
svst1_ver_vnum_za128(15, slice_base, pg, ptr, vnum);
}
diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
index 3f11aa8c5eb2e7c..2cc338add314b0d 100644
--- a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
+++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c
@@ -26,7 +26,7 @@
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -44,7 +44,7 @@ void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -63,7 +63,7 @@ void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -83,7 +83,7 @@ void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -102,7 +102,7 @@ void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -122,7 +122,7 @@ void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -141,7 +141,7 @@ void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -161,7 +161,7 @@ void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -178,7 +178,7 @@ void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -196,7 +196,7 @@ void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -215,7 +215,7 @@ void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -235,7 +235,7 @@ void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -254,7 +254,7 @@ void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -274,7 +274,7 @@ void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -293,7 +293,7 @@ void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -313,7 +313,7 @@ void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -332,7 +332,7 @@ void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -352,7 +352,7 @@ void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -371,7 +371,7 @@ void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -391,7 +391,7 @@ void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -410,7 +410,7 @@ void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -430,7 +430,7 @@ void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -449,7 +449,7 @@ void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -469,7 +469,7 @@ void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -486,7 +486,7 @@ void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -502,7 +502,7 @@ void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -520,7 +520,7 @@ void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -538,7 +538,7 @@ void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -556,7 +556,7 @@ void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -574,7 +574,7 @@ void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -592,7 +592,7 @@ void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -610,7 +610,7 @@ void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -626,7 +626,7 @@ void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -642,7 +642,7 @@ void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -660,7 +660,7 @@ void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -678,7 +678,7 @@ void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -696,7 +696,7 @@ void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -714,7 +714,7 @@ void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -732,7 +732,7 @@ void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -750,7 +750,7 @@ void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -768,7 +768,7 @@ void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -786,7 +786,7 @@ void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -804,7 +804,7 @@ void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -822,7 +822,7 @@ void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -840,7 +840,7 @@ void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -858,7 +858,7 @@ void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -876,7 +876,7 @@ void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -894,7 +894,7 @@ void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, pg, zn);
}
@@ -910,7 +910,7 @@ void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, pg, zn);
}
@@ -928,7 +928,7 @@ void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice, pg, zn);
}
@@ -947,7 +947,7 @@ void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, pg, zn);
}
@@ -967,7 +967,7 @@ void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice, pg, zn);
}
@@ -986,7 +986,7 @@ void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1006,7 +1006,7 @@ void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice, pg, zn);
}
@@ -1025,7 +1025,7 @@ void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1045,7 +1045,7 @@ void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice, pg, zn);
}
@@ -1062,7 +1062,7 @@ void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1080,7 +1080,7 @@ void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[ADD]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
uint32_t slice = slice_base + 15;
SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice, pg, zn);
}
@@ -1099,7 +1099,7 @@ void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1119,7 +1119,7 @@ void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice, pg, zn);
}
@@ -1138,7 +1138,7 @@ void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1158,7 +1158,7 @@ void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice, pg, zn);
}
@@ -1177,7 +1177,7 @@ void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1197,7 +1197,7 @@ void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice, pg, zn);
}
@@ -1216,7 +1216,7 @@ void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1236,7 +1236,7 @@ void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice, pg, zn);
}
@@ -1255,7 +1255,7 @@ void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1275,7 +1275,7 @@ void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[ADD]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
uint32_t slice = slice_base + 7;
SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice, pg, zn);
}
@@ -1294,7 +1294,7 @@ void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1314,7 +1314,7 @@ void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[ADD]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
uint32_t slice = slice_base + 3;
SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice, pg, zn);
}
@@ -1333,7 +1333,7 @@ void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1353,7 +1353,7 @@ void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[ADD]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
uint32_t slice = slice_base + 1;
SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice, pg, zn);
}
@@ -1370,7 +1370,7 @@ void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, pg, zn);
}
@@ -1386,7 +1386,7 @@ void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) {
+void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, pg, zn);
}
@@ -1404,7 +1404,7 @@ void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, pg, zn);
}
@@ -1422,7 +1422,7 @@ void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) {
+void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, pg, zn);
}
@@ -1440,7 +1440,7 @@ void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, pg, zn);
}
@@ -1458,7 +1458,7 @@ void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) {
+void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, pg, zn);
}
@@ -1476,7 +1476,7 @@ void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, pg, zn);
}
@@ -1494,7 +1494,7 @@ void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) {
+void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, pg, zn);
}
@@ -1510,7 +1510,7 @@ void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, pg, zn);
}
@@ -1526,7 +1526,7 @@ void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE]], <vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) {
+void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, pg, zn);
}
@@ -1544,7 +1544,7 @@ void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, pg, zn);
}
@@ -1562,7 +1562,7 @@ void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) {
+void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, pg, zn);
}
@@ -1580,7 +1580,7 @@ void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, pg, zn);
}
@@ -1598,7 +1598,7 @@ void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) {
+void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, pg, zn);
}
@@ -1616,7 +1616,7 @@ void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, pg, zn);
}
@@ -1634,7 +1634,7 @@ void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn)
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) {
+void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, pg, zn);
}
@@ -1652,7 +1652,7 @@ void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t z
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, pg, zn);
}
@@ -1670,7 +1670,7 @@ void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) {
+void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, pg, zn);
}
@@ -1688,7 +1688,7 @@ void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, pg, zn);
}
@@ -1706,7 +1706,7 @@ void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) {
+void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, pg, zn);
}
@@ -1724,7 +1724,7 @@ void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, pg, zn);
}
@@ -1742,7 +1742,7 @@ void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) {
+void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, pg, zn);
}
@@ -1760,7 +1760,7 @@ void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, pg, zn);
}
@@ -1778,7 +1778,7 @@ void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[ZN]])
// CHECK-CXX-NEXT: ret void
//
-void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) {
+void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) __arm_streaming {
SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, pg, zn);
}
//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
index 2de6d9f6877f021..812ada00ceb39b9 100644
--- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
+++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_target.c
@@ -6,20 +6,20 @@
#include <arm_sme_draft_spec_subject_to_change.h>
__attribute__((target("sme")))
-void test_sme(svbool_t pg, void *ptr) {
+void test_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_za8(0, 0, pg, ptr);
}
__attribute__((target("arch=armv8-a+sme")))
-void test_arch_sme(svbool_t pg, void *ptr) {
+void test_arch_sme(svbool_t pg, void *ptr) __arm_streaming {
svld1_hor_vnum_za32(0, 0, pg, ptr, 0);
}
__attribute__((target("+sme")))
-void test_plus_sme(svbool_t pg, void *ptr) {
+void test_plus_sme(svbool_t pg, void *ptr) __arm_streaming {
svst1_ver_za16(0, 0, pg, ptr);
}
-void undefined(svbool_t pg, void *ptr) {
- svst1_ver_vnum_za64(0, 0, pg, ptr, 0); // expected-error {{'svst1_ver_vnum_za64' needs target feature sme}}
+void undefined(svbool_t pg, void *ptr) __arm_streaming { // expected-error {{function executed in streaming-SVE mode requires 'sme'}}
+ svst1_ver_vnum_za64(0, 0, pg, ptr, 0);
}
More information about the cfe-commits
mailing list