[clang] [Clang][AArch64] Add diagnostics for builtins that use ZT0. (PR #79140)
Sander de Smalen via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 23 06:04:10 PST 2024
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/79140
Similar to what we did for ZA, this patch adds diagnostics to flag when using a ZT0 builtin in a function that does not have ZT0 state.
>From 7bd053d35ad114a2f95281abd22a9c4e963cca36 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 16 Jan 2024 09:43:32 +0000
Subject: [PATCH] [Clang][AArch64] Add diagnostics for builtins that use ZT0.
Similar to what we did for ZA, this patch adds diagnostics to flag
when using a ZT0 builtin in a function that does not have ZT0 state.
---
.../clang/Basic/DiagnosticSemaKinds.td | 3 +++
clang/include/clang/Basic/arm_sme.td | 18 ++++++++--------
clang/include/clang/Basic/arm_sve_sme_incl.td | 3 +++
clang/lib/Sema/SemaChecking.cpp | 17 +++++++++++++++
.../acle_sme2_ldr_str_zt.c | 4 ++--
.../acle_sme2_luti2_lane_zt.c | 18 ++++++++--------
.../acle_sme2_luti2_lane_zt_x2.c | 18 ++++++++--------
.../acle_sme2_luti2_lane_zt_x4.c | 18 ++++++++--------
.../acle_sme2_luti4_lane_zt.c | 18 ++++++++--------
.../acle_sme2_luti4_lane_zt_x2.c | 18 ++++++++--------
.../acle_sme2_luti4_lane_zt_x4.c | 14 ++++++-------
.../acle_sme2_zero_zt.c | 2 +-
.../Sema/aarch64-incompat-sm-builtin-calls.c | 10 ++++++++-
.../aarch64-sme2-intrinsics/acle_sme2_imm.cpp | 16 +++++++-------
clang/utils/TableGen/SveEmitter.cpp | 21 ++++++++++++++++---
15 files changed, 122 insertions(+), 76 deletions(-)
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index e027e754477fcf4..a1c32abb4dcd880 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3170,6 +3170,9 @@ def warn_attribute_arm_sm_incompat_builtin : Warning<
def warn_attribute_arm_za_builtin_no_za_state : Warning<
"builtin call is not valid when calling from a function without active ZA state">,
InGroup<DiagGroup<"undefined-arm-za">>;
+def warn_attribute_arm_zt0_builtin_no_zt0_state : Warning<
+ "builtin call is not valid when calling from a function without active ZT0 state">,
+ InGroup<DiagGroup<"undefined-arm-zt0">>;
def err_sve_vector_in_non_sve_target : Error<
"SVE vector type %0 cannot be used in a target without sve">;
def err_attribute_riscv_rvv_bits_unsupported : Error<
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 4fb50b8e4e4e565..695e1bddf9ffc61 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -636,37 +636,37 @@ let TargetGuard = "sme2,sme-i16i64" in {
// Spill and fill of ZT0
//
let TargetGuard = "sme2" in {
- def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>;
- def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>;
+ def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsInOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
+ def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
//
// Zero ZT0
//
let TargetGuard = "sme2" in {
- def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<0, ImmCheck0_0>]>;
+ def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsOutZT0], [ImmCheck<0, ImmCheck0_0>]>;
}
//
// lookup table expand four contiguous registers
//
let TargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
- def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>;
+ def SVLUTI2_LANE_ZT_X4 : Inst<"svluti2_lane_zt_{d}_x4", "4.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI4_LANE_ZT_X4 : Inst<"svluti4_lane_zt_{d}_x4", "4.di[i", "sUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_1>]>;
}
//
// lookup table expand one register
//
let TargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
- def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI2_LANE_ZT : Inst<"svluti2_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>;
+ def SVLUTI4_LANE_ZT : Inst<"svluti4_lane_zt_{d}", "di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
}
//
// lookup table expand two contiguous registers
//
let TargetGuard = "sme2" in {
- def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
- def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
+ def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
+ def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
}
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index e6ad84676053b5c..9a6ea9898ef702c 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -229,6 +229,9 @@ def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrin
def IsInZA : FlagType<0x80000000000>;
def IsOutZA : FlagType<0x100000000000>;
def IsInOutZA : FlagType<0x200000000000>;
+def IsInZT0 : FlagType<0x400000000000>;
+def IsOutZT0 : FlagType<0x800000000000>;
+def IsInOutZT0 : FlagType<0x1000000000000>;
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
class ImmCheckType<int val> {
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 1f83dcf07b6f9e5..7833d5a2ea20eec 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3013,6 +3013,11 @@ enum ArmSMEState : unsigned {
ArmOutZA = 0b10,
ArmInOutZA = 0b11,
ArmZAMask = 0b11,
+
+ ArmInZT0 = 0b01 << 2,
+ ArmOutZT0 = 0b10 << 2,
+ ArmInOutZT0 = 0b11 << 2,
+ ArmZT0Mask = 0b11 << 2
};
bool Sema::ParseSVEImmChecks(
@@ -3206,6 +3211,13 @@ static bool hasArmZAState(const FunctionDecl *FD) {
(FD->hasAttr<ArmNewAttr>() && FD->getAttr<ArmNewAttr>()->isNewZA());
}
+static bool hasArmZT0State(const FunctionDecl *FD) {
+ const auto *T = FD->getType()->getAs<FunctionProtoType>();
+ return (T && FunctionType::getArmZT0State(T->getAArch64SMEAttributes()) !=
+ FunctionType::ARM_None) ||
+ (FD->hasAttr<ArmNewAttr>() && FD->getAttr<ArmNewAttr>()->isNewZT0());
+}
+
static ArmSMEState getSMEState(unsigned BuiltinID) {
switch (BuiltinID) {
default:
@@ -3233,6 +3245,11 @@ bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) {
Diag(TheCall->getBeginLoc(),
diag::warn_attribute_arm_za_builtin_no_za_state)
<< TheCall->getSourceRange();
+
+ if ((getSMEState(BuiltinID) & ArmZT0Mask) && !hasArmZT0State(FD))
+ Diag(TheCall->getBeginLoc(),
+ diag::warn_attribute_arm_zt0_builtin_no_zt0_state)
+ << TheCall->getSourceRange();
}
// Range check SME intrinsics that take immediate values.
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
index 1a495d3b117ecc1..3e4454d943358c8 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_ldr_str_zt.c
@@ -20,7 +20,7 @@
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr.zt(i32 0, ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret void
//
-void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("za") {
+void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("zt0") {
svldr_zt(0, base);
}
@@ -36,6 +36,6 @@ void test_svldr_zt(const void *base) __arm_streaming_compatible __arm_out("za")
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str.zt(i32 0, ptr [[BASE:%.*]])
// CPP-CHECK-NEXT: ret void
//
-void test_svstr_zt(void *base) __arm_streaming_compatible __arm_in("za") {
+void test_svstr_zt(void *base) __arm_streaming_compatible __arm_in("zt0") {
svstr_zt(0, base);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
index d656178fdf7a127..7c210fbe6923e92 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt.c
@@ -19,7 +19,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u8(0, zn, 15);
}
@@ -34,7 +34,7 @@ svuint8_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti2.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s8(0, zn, 15);
}
@@ -48,7 +48,7 @@ svint8_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u16(0, zn, 15);
}
@@ -63,7 +63,7 @@ svuint16_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti2.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s16(0, zn, 15);
}
@@ -77,7 +77,7 @@ svint16_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti2.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f16(0, zn, 15);
}
@@ -91,7 +91,7 @@ svfloat16_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti2.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_bf16(0, zn, 15);
}
@@ -105,7 +105,7 @@ svbfloat16_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u32(0, zn, 15);
}
@@ -119,7 +119,7 @@ svuint32_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti2.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s32(0, zn, 15);
}
@@ -133,6 +133,6 @@ svint32_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti2.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 15)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f32(0, zn, 15);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c
index 60cd24fdf4630f6..d7ef75ce01dd70d 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x2.c
@@ -26,7 +26,7 @@
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]]
//
-svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u8_x2(0, zn, 7);
}
@@ -49,7 +49,7 @@ svuint8x2_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]]
//
-svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s8_x2(0, zn, 7);
}
@@ -71,7 +71,7 @@ svint8x2_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP4]]
//
-svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u16_x2(0, zn, 7);
}
@@ -94,7 +94,7 @@ svuint16x2_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP4]]
//
-svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s16_x2(0, zn, 7);
}
@@ -116,7 +116,7 @@ svint16x2_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x half> [[TMP4]]
//
-svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f16_x2(0, zn, 7);
}
@@ -138,7 +138,7 @@ svfloat16x2_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP4]]
//
-svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_bf16_x2(0, zn, 7);
}
@@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in(
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP4]]
//
-svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u32_x2(0, zn, 7);
}
@@ -182,7 +182,7 @@ svuint32x2_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP4]]
//
-svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s32_x2(0, zn, 7);
}
@@ -204,6 +204,6 @@ svint32x2_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
//
-svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32x2_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f32_x2(0, zn, 7);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c
index e05748c8a64244f..f65c0ef61f81871 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti2_lane_zt_x4.c
@@ -34,7 +34,7 @@
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
// CPP-CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]]
//
-svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u8_x4(0, zn, 3);
}
@@ -65,7 +65,7 @@ svuint8x4_t test_svluti2_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
// CPP-CHECK-NEXT: ret <vscale x 64 x i8> [[TMP8]]
//
-svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s8_x4(0, zn, 3);
}
@@ -95,7 +95,7 @@ svint8x4_t test_svluti2_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x i16> [[TMP8]]
//
-svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u16_x4(0, zn, 3);
}
@@ -125,7 +125,7 @@ svuint16x4_t test_svluti2_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x i16> [[TMP8]]
//
-svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s16_x4(0, zn, 3);
}
@@ -155,7 +155,7 @@ svint16x4_t test_svluti2_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x half> [[TMP8]]
//
-svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f16_x4(0, zn, 3);
}
@@ -185,7 +185,7 @@ svfloat16x4_t test_svluti2_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x bfloat> [[TMP8]]
//
-svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_bf16_x4(0, zn, 3);
}
@@ -215,7 +215,7 @@ svbfloat16x4_t test_svluti2_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in(
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x i32> [[TMP8]]
//
-svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_u32_x4(0, zn, 3);
}
@@ -245,7 +245,7 @@ svuint32x4_t test_svluti2_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x i32> [[TMP8]]
//
-svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_s32_x4(0, zn, 3);
}
@@ -275,6 +275,6 @@ svint32x4_t test_svluti2_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x float> [[TMP8]]
//
-svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32x4_t test_svluti2_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti2_lane_zt_f32_x4(0, zn, 3);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
index 1e303a9a661d7f5..cbab1cc8e81bd45 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt.c
@@ -19,7 +19,7 @@
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u8(0, zn, 7);
}
@@ -34,7 +34,7 @@ svuint8_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sme.luti4.lane.zt.nxv16i8(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
-svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s8(0, zn, 7);
}
@@ -48,7 +48,7 @@ svint8_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u16(0, zn, 7);
}
@@ -62,7 +62,7 @@ svuint16_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sme.luti4.lane.zt.nxv8i16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
//
-svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s16(0, zn, 7);
}
@@ -76,7 +76,7 @@ svint16_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sme.luti4.lane.zt.nxv8f16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
//
-svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f16(0, zn, 7);
}
@@ -90,7 +90,7 @@ svfloat16_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sme.luti4.lane.zt.nxv8bf16(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 8 x bfloat> [[TMP0]]
//
-svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_bf16(0, zn, 7);
}
@@ -104,7 +104,7 @@ svbfloat16_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u32(0, zn, 7);
}
@@ -118,7 +118,7 @@ svuint32_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sme.luti4.lane.zt.nxv4i32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
//
-svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s32(0, zn, 7);
}
@@ -132,6 +132,6 @@ svint32_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sme.luti4.lane.zt.nxv4f32(i32 0, <vscale x 16 x i8> [[ZN:%.*]], i32 7)
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
//
-svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f32(0, zn, 7);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c
index 3544f62527aca96..f7f16281ff4061a 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x2.c
@@ -26,7 +26,7 @@
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]]
//
-svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u8_x2(0, zn, 3);
}
@@ -49,7 +49,7 @@ svuint8x2_t test_svluti4_lane_zt_u8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
// CPP-CHECK-NEXT: ret <vscale x 32 x i8> [[TMP4]]
//
-svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s8_x2(0, zn, 3);
}
@@ -71,7 +71,7 @@ svint8x2_t test_svluti4_lane_zt_s8(svuint8_t zn) __arm_streaming __arm_in("za")
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP4]]
//
-svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u16_x2(0, zn, 3);
}
@@ -94,7 +94,7 @@ svuint16x2_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x i16> [[TMP4]]
//
-svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s16_x2(0, zn, 3);
}
@@ -116,7 +116,7 @@ svint16x2_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x half> [[TMP4]]
//
-svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f16_x2(0, zn, 3);
}
@@ -138,7 +138,7 @@ svfloat16x2_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], i64 8)
// CPP-CHECK-NEXT: ret <vscale x 16 x bfloat> [[TMP4]]
//
-svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_bf16_x2(0, zn, 3);
}
@@ -160,7 +160,7 @@ svbfloat16x2_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in(
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP4]]
//
-svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u32_x2(0, zn, 3);
}
@@ -182,7 +182,7 @@ svuint32x2_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x i32> [[TMP4]]
//
-svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s32_x2(0, zn, 3);
}
@@ -204,6 +204,6 @@ svint32x2_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], i64 4)
// CPP-CHECK-NEXT: ret <vscale x 8 x float> [[TMP4]]
//
-svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32x2_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f32_x2(0, zn, 3);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c
index e7151e82560390d..3fedfdc33089329 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_luti4_lane_zt_x4.c
@@ -36,7 +36,7 @@
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x i16> [[TMP8]]
//
-svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u16_x4(0, zn, 1);
}
@@ -68,7 +68,7 @@ svuint16x4_t test_svluti4_lane_zt_u16(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x half> [[TMP8]]
//
-svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f16_x4(0, zn, 1);
}
@@ -100,7 +100,7 @@ svfloat16x4_t test_svluti4_lane_zt_f16(svuint8_t zn) __arm_streaming __arm_in("z
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x bfloat> [[TMP8]]
//
-svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_bf16_x4(0, zn, 1);
}
@@ -132,7 +132,7 @@ svbfloat16x4_t test_svluti4_lane_zt_bf16(svuint8_t zn) __arm_streaming __arm_in(
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 24)
// CPP-CHECK-NEXT: ret <vscale x 32 x i16> [[TMP8]]
//
-svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s16_x4(0, zn, 1);
}
@@ -164,7 +164,7 @@ svint16x4_t test_svluti4_lane_zt_s16(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x i32> [[TMP8]]
//
-svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_u32_x4(0, zn, 1);
}
@@ -196,7 +196,7 @@ svuint32x4_t test_svluti4_lane_zt_u32(svuint8_t zn) __arm_streaming __arm_in("za
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x i32> [[TMP8]]
//
-svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_s32_x4(0, zn, 1);
}
@@ -228,6 +228,6 @@ svint32x4_t test_svluti4_lane_zt_s32(svuint8_t zn) __arm_streaming __arm_in("za"
// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 12)
// CPP-CHECK-NEXT: ret <vscale x 16 x float> [[TMP8]]
//
-svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("za") {
+svfloat32x4_t test_svluti4_lane_zt_f32(svuint8_t zn) __arm_streaming __arm_in("zt0") {
return svluti4_lane_zt_f32_x4(0, zn, 1);
}
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
index 4a038ce61e3bfef..4105cc3e78ec29f 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
@@ -18,6 +18,6 @@
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0)
// CPP-CHECK-NEXT: ret void
//
-void test_svzero_zt(void) __arm_streaming_compatible __arm_out("za") {
+void test_svzero_zt(void) __arm_streaming_compatible __arm_out("zt0") {
svzero_zt(0);
}
diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
index 079cff5a5bbae95..55c97c73e8b6952 100644
--- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
+++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c
@@ -1,6 +1,6 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve \
-// RUN: -target-feature +sme -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s
+// RUN: -target-feature +sme2 -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s
// REQUIRES: aarch64-registered-target
@@ -108,3 +108,11 @@ svint8_t new_za(svint8_t zd, svbool_t pg, uint32_t slice_base) __arm_streaming {
// expected-no-warning
return svread_hor_za8_s8_m(zd, pg, 0, slice_base);
}
+
+void missing_zt0(void) __arm_streaming {
+ // expected-warning at +1 {{builtin call is not valid when calling from a function without active ZT0 state}}
+ svzero_zt(0);
+}
+
+__arm_new("zt0")
+void new_zt0(void) __arm_streaming { svzero_zt(0); } // no warning
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 0d9f090637eeca0..a627ef9c01ae2dc 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -71,15 +71,15 @@ void test_outer_product(svbool_t pred, svint16_t s16, svuint16_t u16, svint32_t
svbmops_za32_s32_m(4, pred, pred, s32, s32); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
-void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_inout("za") {
+void test_ldr_zt(const void *const_base) __arm_streaming_compatible __arm_inout("zt0") {
svldr_zt(1, const_base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
}
-void test_str_zt(void *base) __arm_streaming_compatible __arm_in("za") {
+void test_str_zt(void *base) __arm_streaming_compatible __arm_in("zt0") {
svstr_zt(1, base); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
}
-void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") {
+void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti2_lane_zt_u8_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
@@ -106,7 +106,7 @@ void test_svluti2_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") {
svluti2_lane_zt_f32_x4(0, zn, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
}
-void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") {
+void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti4_lane_zt_u16_x4(1, zn, 0); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
@@ -129,7 +129,7 @@ void test_svluti4_lane_zt_x4(svuint8_t zn) __arm_streaming __arm_in("za") {
svluti4_lane_zt_f32_x4(0, zn, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
}
-void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
+void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti2_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
@@ -156,7 +156,7 @@ void test_svluti2_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
svluti2_lane_zt_f32(0, zn_u8, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
}
-void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
+void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti4_lane_zt_u8(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
@@ -183,7 +183,7 @@ void test_svluti4_lane_zt(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
svluti4_lane_zt_f32(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}
-void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
+void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti2_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
@@ -210,7 +210,7 @@ void test_svluti2_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
svluti2_lane_zt_f32_x2(0, zn_u8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
}
-void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("za") {
+void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_in("zt0") {
// Test Reg Offset
svluti4_lane_zt_u8_x2(1, zn_u8, 2); // expected-error {{argument value 1 is outside the valid range [0, 0]}}
// Test index value range
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index fbedd27bf998df6..174304f09007bfe 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1722,12 +1722,27 @@ void SVEEmitter::createBuiltinZAState(raw_ostream &OS) {
std::map<std::string, std::set<std::string>> IntrinsicsPerState;
for (auto &Def : Defs) {
+ std::string Key;
+ auto AddToKey = [&Key](const std::string &S) -> void {
+ Key = Key.empty() ? S : (Key + " | " + S);
+ };
+
if (Def->isFlagSet(getEnumValueForFlag("IsInZA")))
- IntrinsicsPerState["ArmInZA"].insert(Def->getMangledName());
+ AddToKey("ArmInZA");
else if (Def->isFlagSet(getEnumValueForFlag("IsOutZA")))
- IntrinsicsPerState["ArmOutZA"].insert(Def->getMangledName());
+ AddToKey("ArmOutZA");
else if (Def->isFlagSet(getEnumValueForFlag("IsInOutZA")))
- IntrinsicsPerState["ArmInOutZA"].insert(Def->getMangledName());
+ AddToKey("ArmInOutZA");
+
+ if (Def->isFlagSet(getEnumValueForFlag("IsInZT0")))
+ AddToKey("ArmInZT0");
+ else if (Def->isFlagSet(getEnumValueForFlag("IsOutZT0")))
+ AddToKey("ArmOutZT0");
+ else if (Def->isFlagSet(getEnumValueForFlag("IsInOutZT0")))
+ AddToKey("ArmInOutZT0");
+
+ if (!Key.empty())
+ IntrinsicsPerState[Key].insert(Def->getMangledName());
}
OS << "#ifdef GET_SME_BUILTIN_GET_STATE\n";
More information about the cfe-commits
mailing list