[clang] [Clang][AArch64] Require SVE or SSVE for scalable types. (PR #91356)

Tue May 7 09:30:50 PDT 2024

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Sander de Smalen (sdesmalen-arm)

<details>
<summary>Changes</summary>

Scalable types are only available when:
* The function is compiled with +sve
* The function is compiled with +sme and the function is executed in Streaming-SVE mode.

---

Patch is 73.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91356.diff


19 Files Affected:

- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2) 
- (modified) clang/lib/Sema/Sema.cpp (+8-3) 
- (modified) clang/lib/Sema/SemaDecl.cpp (+11-5) 
- (modified) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c (+8-2) 
- (modified) clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c (+42-36) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c (+12-6) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_create2_bool.c (+10-10) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_get4_bool.c (+12-12) 
- (modified) clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_undef_bool.c (+8-2) 
- (modified) clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c (+2) 
- (modified) clang/test/Sema/aarch64-sme2p1-diagnostics.c (+1-1) 


``````````diff

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 9a0bae9c216de9..72326d4509cd7b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3205,6 +3205,8 @@ def warn_attribute_arm_zt0_builtin_no_zt0_state : Warning<
   InGroup<DiagGroup<"undefined-arm-zt0">>;
 def err_sve_vector_in_non_sve_target : Error<
   "SVE vector type %0 cannot be used in a target without sve">;
+def err_sve_vector_in_non_streaming_function : Error<
+  "SVE vector type %0 cannot be used in a non-streaming function">;
 def err_attribute_riscv_rvv_bits_unsupported : Error<
   "%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a "
   "value of \"zvl\" or a power 2 in the range [64,65536]">;
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index a1e32d391ed0cc..92f859b7146c71 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -2056,9 +2056,14 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
     if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) {
       llvm::StringMap<bool> CallerFeatureMap;
       Context.getFunctionFeatureMap(CallerFeatureMap, FD);
-      if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap) &&
-          !Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
-        Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty;
+      if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
+        if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
+          Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty;
+        else if (!IsArmStreamingFunction(FD, /*IncludeLocallyStreaming=*/true)) {
+          Diag(D->getLocation(), diag::err_sve_vector_in_non_streaming_function)
+              << Ty;
+        }
+      }
     }
   };
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 590f37837eb2df..0d11a2acf2569a 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -8982,11 +8982,17 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     const FunctionDecl *FD = cast<FunctionDecl>(CurContext);
     llvm::StringMap<bool> CallerFeatureMap;
     Context.getFunctionFeatureMap(CallerFeatureMap, FD);
-    if (!Builtin::evaluateRequiredTargetFeatures(
-        "sve", CallerFeatureMap)) {
-      Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T;
-      NewVD->setInvalidDecl();
-      return;
+
+    if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) {
+      if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) {
+        Diag(NewVD->getLocation(), diag::err_sve_vector_in_non_sve_target) << T;
+        NewVD->setInvalidDecl();
+      } else if (!IsArmStreamingFunction(FD, /*IncludeLocallyStreaming=*/true)) {
+        Diag(NewVD->getLocation(),
+             diag::err_sve_vector_in_non_streaming_function)
+            << T;
+        NewVD->setInvalidDecl();
+      }
     }
   }
 
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
index b3d5f4a4c4a537..c225c5c6c669e6 100644
--- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_reinterpret_svcount_svbool.c
@@ -9,6 +9,12 @@
 
 #include <arm_sme.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR __arm_streaming_compatible
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.§
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.taarch64.svcountt(target("aarch64.svcount") [[CNT:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatible
+svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svreinterpret,_b,,)(cnt);
 }
@@ -41,7 +47,7 @@ svbool_t test_svreinterpret_svbool_svcnt(svcount_t cnt) __arm_streaming_compatib
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) __arm_streaming_compatible
+svcount_t test_svreinterpret_svcnt_svbool(svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svreinterpret,_c,,)(pg);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
index 128a7eb102da9e..d2a4e1669a1cfa 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
@@ -12,6 +12,12 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR __arm_streaming_compatible
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -29,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) {
+svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s8, _z, )(pg, op);
 }
 
@@ -45,7 +51,7 @@ svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) {
+svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s16, _z, )(pg, op);
 }
 
@@ -61,7 +67,7 @@ svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) {
+svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s32, _z, )(pg, op);
 }
 
@@ -77,7 +83,7 @@ svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) {
+svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s64, _z, )(pg, op);
 }
 
@@ -91,7 +97,7 @@ svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) {
+svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u8, _z, )(pg, op);
 }
 // CHECK-LABEL: @test_svrevd_u16_z(
@@ -106,7 +112,7 @@ svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) {
+svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u16, _z, )(pg, op);
 }
 
@@ -122,7 +128,7 @@ svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) {
+svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u32, _z, )(pg, op);
 }
 
@@ -138,7 +144,7 @@ svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) {
+svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u64, _z, )(pg, op);
 }
 
@@ -152,7 +158,7 @@ svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) {
+svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s8, _m, )(inactive, pg, op);
 }
 
@@ -168,7 +174,7 @@ svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) {
+svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s16, _m, )(inactive, pg, op);
 }
 
@@ -184,7 +190,7 @@ svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) {
+svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s32, _m, )(inactive, pg, op);
 }
 
@@ -200,7 +206,7 @@ svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) {
+svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s64, _m, )(inactive, pg, op);
 }
 
@@ -214,7 +220,7 @@ svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) {
+svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u8, _m, )(inactive, pg, op);
 }
 
@@ -230,7 +236,7 @@ svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) {
+svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u16, _m, )(inactive, pg, op);
 }
 
@@ -246,7 +252,7 @@ svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) {
+svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u32, _m, )(inactive, pg, op);
 }
 
@@ -262,7 +268,7 @@ svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) {
+svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u64, _m, )(inactive, pg, op);
 }
 
@@ -276,7 +282,7 @@ svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) {
+svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s8, _x, )(pg, op);
 }
 
@@ -292,7 +298,7 @@ svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) {
+svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s16, _x, )(pg, op);
 }
 
@@ -308,7 +314,7 @@ svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) {
+svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s32, _x, )(pg, op);
 }
 
@@ -324,7 +330,7 @@ svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) {
+svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _s64, _x, )(pg, op);
 }
 
@@ -338,7 +344,7 @@ svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.revd.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) {
+svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u8, _x, )(pg, op);
 }
 
@@ -354,7 +360,7 @@ svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revd.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) {
+svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u16, _x, )(pg, op);
 }
 
@@ -370,7 +376,7 @@ svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) {
+svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u32, _x, )(pg, op);
 }
 
@@ -386,7 +392,7 @@ svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revd.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) {
+svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op);
 }
 
@@ -403,7 +409,7 @@ svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.revd.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) {
+svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _bf16, _z, )(pg, op);
 }
 
@@ -419,7 +425,7 @@ svbfloat16_t test_svrevd_bf16_z(svbool_t pg, svbfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.revd.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) {
+svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _f16, _z, )(pg, op);
 }
 
@@ -435,7 +441,7 @@ svfloat16_t test_svrevd_f16_z(svbool_t pg, svfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.revd.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) {
+svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svrevd, _f32, _z, )(pg, op);
 }
 
@@ -451,7 +457,7 @@ svfloat32_t test_svrevd_f32_z(svbool_t pg, svfloat32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.revd.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrevd_f64_z(svbool_t pg, svfloat64_t op) {
+svfloat64_t test_svre...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/91356