[clang] Revert "[Clang][SME] Add IsStreamingOrSVE2p1" (PR #76973)

Thu Jan 4 08:53:14 PST 2024

https://github.com/SamTebbs33 created https://github.com/llvm/llvm-project/pull/76973

Reverts llvm/llvm-project#75958

I mistakenly included a commit from my local main after rebasing.

>From d63f848e75b66ae2e5915ada4bd5b370e05f9829 Mon Sep 17 00:00:00 2001
From: Sam Tebbs <sam at tebbs.me>
Date: Thu, 4 Jan 2024 16:52:31 +0000
Subject: [PATCH] Revert "[Clang][SME] Add IsStreamingOrSVE2p1 (#75958)"

This reverts commit 8f8152091cae186b35f73331df13f36b0f905eb4.
---
 clang/include/clang/Basic/arm_sve.td          | 123 ++++++++--------
 clang/include/clang/Basic/arm_sve_sme_incl.td |   1 -
 clang/lib/Basic/Targets/AArch64.h             |   1 -
 clang/lib/Sema/SemaChecking.cpp               |  17 +--
 .../aarch64-sve2-intrinsics/acle_sve2_revd.c  |   4 +-
 .../acle_sve2p1_bfmlsl.c                      |   8 +-
 .../acle_sve2p1_cntp.c                        |  27 ++--
 .../acle_sve2p1_fclamp.c                      |  14 +-
 .../acle_sve2p1_ld1.c                         |   2 -
 .../acle_sve2p1_pext.c                        |   5 +-
 .../acle_sve2p1_pfalse.c                      |  14 +-
 .../acle_sve2p1_psel.c                        |  26 +---
 .../acle_sve2p1_sclamp.c                      |  20 +--
 .../acle_sve2p1_stnt1.c                       |   2 +
 .../acle_sve2p1_uclamp.c                      |  20 +--
 .../acle_sve2p1_while_pn.c                    | 137 +++++++++---------
 .../aarch64-sme2-intrinsics/acle_sme2_imm.cpp |   2 +-
 .../Sema/aarch64-sme2-sve2p1-diagnostics.c    |  37 -----
 clang/utils/TableGen/SveEmitter.cpp           |   3 -
 19 files changed, 174 insertions(+), 289 deletions(-)
 delete mode 100644 clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 17ee82d9304ffe..91f62c4c76339d 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -1296,9 +1296,9 @@ def SVCREATE_3_BF16 : SInst<"svcreate3[_{d}]", "3ddd",  "b", MergeNone, "", [IsT
 def SVCREATE_4_BF16 : SInst<"svcreate4[_{d}]", "4dddd", "b", MergeNone, "", [IsTupleCreate]>;
 }
 
-let TargetGuard = "sve2p1|sme2" in {
-  def SVCREATE_2_B : SInst<"svcreate2[_{d}]", "2dd",   "Pc", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
-  def SVCREATE_4_B : SInst<"svcreate4[_{d}]", "4dddd", "Pc", MergeNone, "", [IsTupleCreate, IsStreamingCompatible]>;
+let TargetGuard = "sve2p1" in {
+  def SVCREATE_2_B : SInst<"svcreate2[_{d}]", "2dd",   "Pc", MergeNone, "", [IsTupleCreate]>;
+  def SVCREATE_4_B : SInst<"svcreate4[_{d}]", "4dddd", "Pc", MergeNone, "", [IsTupleCreate]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1321,7 +1321,7 @@ def SVSET_3_BF16 : SInst<"svset3[_{d}]", "33id", "b", MergeNone, "", [IsTupleSet
 def SVSET_4_BF16 : SInst<"svset4[_{d}]", "44id", "b", MergeNone, "", [IsTupleSet], [ImmCheck<1, ImmCheck0_3>]>;
 }
 
-let TargetGuard = "sve2p1|sme2" in {
+let TargetGuard = "sve2p1" in {
   def SVGET_2_B : SInst<"svget2[_{d}]", "d2i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_1>]>;
   def SVGET_4_B : SInst<"svget4[_{d}]", "d4i", "Pc", MergeNone, "", [IsTupleGet], [ImmCheck<1, ImmCheck0_3>]>;
 
@@ -1976,37 +1976,39 @@ def SVFMINQV: SInst<"svminqv[_{d}]", "{Pd", "hfd", MergeNone, "aarch64_sve_fminq
 }
 
 let TargetGuard = "sve2p1|sme2" in {
-def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_3>]>;
-def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck0_1>]>;
+//FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available
+def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>;
+def SVPEXT_X2     : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>;
 
-def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILELE_COUNT  : SInst<"svwhilele_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILELT_COUNT  : SInst<"svwhilelt_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILELO_COUNT  : SInst<"svwhilelt_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILELS_COUNT  : SInst<"svwhilele_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILEHI_COUNT  : SInst<"svwhilegt_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
-def SVWHILEHS_COUNT  : SInst<"svwhilege_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILEGE_COUNT  : SInst<"svwhilege_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILEGT_COUNT  : SInst<"svwhilegt_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILELE_COUNT  : SInst<"svwhilele_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILELT_COUNT  : SInst<"svwhilelt_{d}[_{1}]",  "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILELO_COUNT  : SInst<"svwhilelt_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILELS_COUNT  : SInst<"svwhilele_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILEHI_COUNT  : SInst<"svwhilegt_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
+def SVWHILEHS_COUNT  : SInst<"svwhilege_{d}[_{1}]",  "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>;
 }
 
 multiclass MultiVecLoad<string i> {
-  def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc",   [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc",   [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-
-  def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc",   [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc",   [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld",  [IsStructLoad, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming)
+  def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc",   [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc",   [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+
+  def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc",   [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc",   [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld",  [IsStructLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
 }
 
 let TargetGuard = "sve2p1|sme2" in {
@@ -2015,23 +2017,24 @@ let TargetGuard = "sve2p1|sme2" in {
 }
 
 multiclass MultiVecStore<string i> {
-  def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc",   [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc",   [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-
-  def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc",   [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
-  def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc",   [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
-  def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld",  [IsStructStore, IsStreamingOrSVE2p1], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  // FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available (SME2 requires __arm_streaming)
+  def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc",   [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc",   [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+
+  def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc",   [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
+  def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc",   [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
+  def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld",  [IsStructStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
 }
 
 let TargetGuard = "sve2p1|sme2" in {
@@ -2048,20 +2051,21 @@ def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone
 def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f",  MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>;
 }
 
-let TargetGuard = "sve2p1|sme2" in {
-def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [IsStreamingOrSVE2p1], []>;
-def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [IsStreamingOrSVE2p1], []>;
+let TargetGuard = "sve2p1|sme" in {
+def SVSCLAMP : SInst<"svclamp[_{d}]", "dddd", "csil",     MergeNone, "aarch64_sve_sclamp", [], []>;
+def SVUCLAMP : SInst<"svclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp", [], []>;
 
 defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUlbhfd", "aarch64_sve_revd">;
 }
 
 let TargetGuard = "sve2p1|sme2" in {
-  def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], []>;
+  //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available
+  def SVPTRUE_COUNT  : SInst<"svptrue_{d}", "}v", "QcQsQiQl", MergeNone, "aarch64_sve_ptrue_{d}", [IsOverloadNone, IsStreamingCompatible], []>;
 
-  def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingOrSVE2p1]>;
+  def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>;
 
-  def SVFCLAMP   : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [IsStreamingOrSVE2p1], []>;
-  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
+  def SVFCLAMP   : SInst<"svclamp[_{d}]", "dddd", "hfd", MergeNone, "aarch64_sve_fclamp", [IsStreamingCompatible], []>;
+  def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 }
 
 let TargetGuard = "(sve2|sme2),b16b16" in {
@@ -2322,9 +2326,10 @@ let TargetGuard = "sme2" in {
 
 let TargetGuard = "sve2p1|sme2" in {
 // == BFloat16 multiply-subtract ==
-  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, IsStreamingOrSVE2p1], []>;
-  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, IsStreamingOrSVE2p1], []>;
+// FIXME: Make all of these IsStreamingOrSVE2p1 once that is added
+  def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone, IsStreamingCompatible], []>;
+  def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone, IsStreamingCompatible], []>;
 
-  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_7>]>;
-  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, IsStreamingOrSVE2p1], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>;
+  def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>;
 }
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index ad29864440c96f..0dba8493bad2d6 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -227,7 +227,6 @@ def IsPreservesZA             : FlagType<0x10000000000>;
 def IsReadZA                  : FlagType<0x20000000000>;
 def IsWriteZA                 : FlagType<0x40000000000>;
 def IsReductionQV             : FlagType<0x80000000000>;
-def IsStreamingOrSVE2p1       : FlagType<0x80000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1.
 
 // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
 class ImmCheckType<int val> {
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index e38fa5af56598d..f0e0782e7abe97 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -50,7 +50,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool HasMatMul = false;
   bool HasBFloat16 = false;
   bool HasSVE2 = false;
-  bool HasSVE2p1 = false;
   bool HasSVE2AES = false;
   bool HasSVE2SHA3 = false;
   bool HasSVE2SM4 = false;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f13164dc063866..3168d38dd66c36 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2998,12 +2998,7 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context,
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
 
-enum ArmStreamingType {
-  ArmNonStreaming,
-  ArmStreaming,
-  ArmStreamingCompatible,
-  ArmStreamingOrSVE2p1
-};
+enum ArmStreamingType { ArmNonStreaming, ArmStreaming, ArmStreamingCompatible };
 
 bool Sema::ParseSVEImmChecks(
     CallExpr *TheCall, SmallVector<std::tuple<int, int, int>, 3> &ImmChecks) {
@@ -3161,16 +3156,6 @@ static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall,
                                      const FunctionDecl *FD,
                                      ArmStreamingType BuiltinType) {
   ArmStreamingType FnType = getArmStreamingFnType(FD);
-  if (BuiltinType == ArmStreamingOrSVE2p1) {
-    // Check intrinsics that are available in [sve2p1 or sme/sme2].
-    llvm::StringMap<bool> CallerFeatureMap;
-    S.Context.getFunctionFeatureMap(CallerFeatureMap, FD);
-    if (Builtin::evaluateRequiredTargetFeatures("sve2p1", CallerFeatureMap))
-      BuiltinType = ArmStreamingCompatible;
-    else
-      BuiltinType = ArmStreaming;
-  }
-
   if (FnType == ArmStreaming && BuiltinType == ArmNonStreaming) {
     S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin)
         << TheCall->getSourceRange() << "streaming";
diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
index d82d69442b8ffb..74a90583a173a5 100644
--- a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
-// RUN:   -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN:   -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
 // RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
@@ -9,7 +9,7 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu \
 // RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
index 22d951c069bc8a..c1d14e16ad17b6 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c
@@ -2,20 +2,20 @@
 // REQUIRES: aarch64-registered-target
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve -target-feature -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
 #ifndef TEST_SME2
 #define ATTR
 #else
-#define ATTR __arm_streaming
+#define ATTR __arm_streaming_compatible
 #endif
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
index 9bf55eaa6a08bf..56b1d992622145 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_cntp.c
@@ -3,19 +3,10 @@
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming
-#endif
-
 // CHECK-LABEL: @test_svcntp_c8_vlx2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 2)
@@ -26,7 +17,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c8_vlx2(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c8_vlx2(svcount_t pnn) {
   return svcntp_c8(pnn, 2);
 }
 
@@ -40,7 +31,7 @@ uint64_t test_svcntp_c8_vlx2(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c8(target("aarch64.svcount") [[PNN:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c8_vlx4(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c8_vlx4(svcount_t pnn) {
   return svcntp_c8(pnn, 4);
 }
 
@@ -54,7 +45,7 @@ uint64_t test_svcntp_c8_vlx4(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c16_vlx2(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c16_vlx2(svcount_t pnn) {
   return svcntp_c16(pnn, 2);
 }
 
@@ -68,7 +59,7 @@ uint64_t test_svcntp_c16_vlx2(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c16(target("aarch64.svcount") [[PNN:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c16_vlx4(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c16_vlx4(svcount_t pnn) {
   return svcntp_c16(pnn, 4);
 }
 
@@ -82,7 +73,7 @@ uint64_t test_svcntp_c16_vlx4(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c32_vlx2(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c32_vlx2(svcount_t pnn) {
   return svcntp_c32(pnn, 2);
 }
 
@@ -96,7 +87,7 @@ uint64_t test_svcntp_c32_vlx2(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c32(target("aarch64.svcount") [[PNN:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c32_vlx4(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c32_vlx4(svcount_t pnn) {
   return svcntp_c32(pnn, 4);
 }
 
@@ -110,7 +101,7 @@ uint64_t test_svcntp_c32_vlx4(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c64_vlx2(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c64_vlx2(svcount_t pnn) {
   return svcntp_c64(pnn, 2);
 }
 
@@ -124,6 +115,6 @@ uint64_t test_svcntp_c64_vlx2(svcount_t pnn) ATTR {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.c64(target("aarch64.svcount") [[PNN:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_c64_vlx4(svcount_t pnn) ATTR {
+uint64_t test_svcntp_c64_vlx4(svcount_t pnn) {
   return svcntp_c64(pnn, 4);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
index 7687257701a6e2..5d8c5b7b8a18c6 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_fclamp.c
@@ -11,16 +11,10 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sve \
-// RUN:   -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming
-#endif
-
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -38,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fclamp.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) ATTR {
+svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) {
   return SVE_ACLE_FUNC(svclamp, _f16, , )(op1, op2, op3);
 }
 
@@ -52,7 +46,7 @@ svfloat16_t test_svclamp_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fclamp.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) ATTR {
+svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) {
   return SVE_ACLE_FUNC(svclamp, _f32, , )(op1, op2, op3);
 }
 
@@ -66,7 +60,7 @@ svfloat32_t test_svclamp_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fclamp.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) ATTR {
+svfloat64_t test_svclamp_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) {
   return SVE_ACLE_FUNC(svclamp, _f64, , )(op1, op2, op3);
 }
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
index 7657165d8b3f6f..6f1231e776aa35 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c
@@ -1,8 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -DTEST_SME2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wno-unknown-attributes -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
index 8f08b32618b050..a3206029019c3d 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c
@@ -1,11 +1,8 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -DTEST_SME2 -target-feature +sve -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pfalse.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pfalse.c
index afdb038fb9312a..19993e5418128e 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pfalse.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pfalse.c
@@ -1,20 +1,14 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming
-#endif
-
 // CHECK-LABEL: @test_svpfalse_c(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> zeroinitializer)
@@ -25,7 +19,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> zeroinitializer)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svpfalse_c(void) ATTR
+svcount_t test_svpfalse_c(void) __arm_streaming_compatible
 {
   return svpfalse_c();
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c
index de3f6a9a57bfeb..73b7b0347dd970 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_psel.c
@@ -10,19 +10,9 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu \
 // RUN:   -target-feature +sve2p1 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \
-// RUN:   -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming_compatible
-#endif
-
 // CHECK-LABEL: @test_svpsel_lane_b8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IDX:%.*]], 15
@@ -35,7 +25,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv16i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 16 x i1> [[P2:%.*]], i32 [[ADD]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
+svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_b8(p1, p2, idx + 15);
 }
 
@@ -53,7 +43,7 @@ svbool_t test_svpsel_lane_b8(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv8i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 8 x i1> [[TMP0]], i32 [[ADD]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
+svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_b16(p1, p2, idx + 7);
 }
 
@@ -71,7 +61,7 @@ svbool_t test_svpsel_lane_b16(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv4i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[ADD]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
+svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_b32(p1, p2, idx + 3);
 }
 
@@ -89,7 +79,7 @@ svbool_t test_svpsel_lane_b32(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.psel.nxv2i1(<vscale x 16 x i1> [[P1:%.*]], <vscale x 2 x i1> [[TMP0]], i32 [[ADD]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
+svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_b64(p1, p2, idx + 1);
 }
 
@@ -109,7 +99,7 @@ svbool_t test_svpsel_lane_b64(svbool_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP2]]
 //
-svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
+svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_c8(p1, p2, idx + 15);
 }
 
@@ -131,7 +121,7 @@ svcount_t test_svpsel_lane_c8(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
 //
-svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
+svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_c16(p1, p2, idx + 7);
 }
 
@@ -153,7 +143,7 @@ svcount_t test_svpsel_lane_c16(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
 //
-svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
+svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_c32(p1, p2, idx + 3);
 }
 
@@ -175,6 +165,6 @@ svcount_t test_svpsel_lane_c32(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.convert.from.svbool.taarch64.svcountt(<vscale x 16 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP3]]
 //
-svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) ATTR {
+svcount_t test_svpsel_lane_c64(svcount_t p1, svbool_t p2, uint32_t idx) __arm_streaming_compatible {
   return svpsel_lane_c64(p1, p2, idx + 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
index 04869fd550ec1f..8c63a7455c79f4 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_sclamp.c
@@ -10,21 +10,9 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming
-#endif
-
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -42,7 +30,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) ATTR  {
+svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) {
   return SVE_ACLE_FUNC(svclamp, _s8, , )(op1, op2, op3);
 }
 
@@ -56,7 +44,7 @@ svint8_t test_svclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) ATTR  {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) ATTR  {
+svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) {
   return SVE_ACLE_FUNC(svclamp, _s16, , )(op1, op2, op3);
 }
 
@@ -70,7 +58,7 @@ svint16_t test_svclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) ATTR  {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) ATTR  {
+svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) {
   return SVE_ACLE_FUNC(svclamp, _s32, , )(op1, op2, op3);
 }
 
@@ -84,7 +72,7 @@ svint32_t test_svclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) ATTR  {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) ATTR  {
+svint64_t test_svclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) {
   return SVE_ACLE_FUNC(svclamp, _s64, , )(op1, op2, op3);
 }
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c
index b1ca27b7b68a12..0d8696a7634a70 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c
@@ -505,9 +505,11 @@ void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) ATTR
   return SVE_ACLE_FUNC(svstnt1,_f64_x4,,)(pn, base, v);
 }
 
+
 // == VNUM variants ==
 
 
+
 // CHECK-LABEL: @test_svstnt1_vnum_u8_x2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.vector.extract.nxv16i8.nxv32i8(<vscale x 32 x i8> [[V:%.*]], i64 0)
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
index 37bfd4265a43ad..b8789862488777 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_uclamp.c
@@ -10,21 +10,9 @@
 // RUN:   -S -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sve2p1 \
 // RUN:   -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -Werror -emit-llvm -disable-O0-optnone -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -Werror -emit-llvm -disable-O0-optnone -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 \
-// RUN:   -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
-#ifndef TEST_SME2
-#define ATTR
-#else
-#define ATTR __arm_streaming
-#endif
-
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -42,7 +30,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uclamp.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) ATTR  {
+svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) {
   return SVE_ACLE_FUNC(svclamp, _u8, , )(op1, op2, op3);
 }
 
@@ -56,7 +44,7 @@ svuint8_t test_svclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) ATTR  {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uclamp.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) ATTR  {
+svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) {
   return SVE_ACLE_FUNC(svclamp, _u16, , )(op1, op2, op3);
 }
 
@@ -70,7 +58,7 @@ svuint16_t test_svclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uclamp.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) ATTR  {
+svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) {
   return SVE_ACLE_FUNC(svclamp, _u32, , )(op1, op2, op3);
 }
 
@@ -84,7 +72,7 @@ svuint32_t test_svclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uclamp.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) ATTR  {
+svuint64_t test_svclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) {
   return SVE_ACLE_FUNC(svclamp, _u64, , )(op1, op2, op3);
 }
 
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c
index 11ebec9e7cbf11..143a43b4a92198 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c
@@ -1,10 +1,10 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -DTEST_SME2 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 // REQUIRES: aarch64-registered-target
 
@@ -16,11 +16,6 @@
 #define SVE_ACLE_FUNC(A1, A2) A1##A2
 #endif
 
-#ifdef TEST_SME2
-#define ATTR __arm_streaming
-#else
-#define ATTR
-#endif
 
 // WHILEGE
 
@@ -34,7 +29,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c8_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c8_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c8,_s64)(op1, op2, 2);
 }
@@ -49,7 +44,7 @@ svcount_t test_svwhilege_c8_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c8_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c8_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c8,_s64)(op1, op2, 4);
 }
@@ -64,7 +59,7 @@ svcount_t test_svwhilege_c8_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c16_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c16_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c16,_s64)(op1, op2, 2);
 }
@@ -79,7 +74,7 @@ svcount_t test_svwhilege_c16_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c16_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c16_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c16,_s64)(op1, op2, 4);
 }
@@ -94,7 +89,7 @@ svcount_t test_svwhilege_c16_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c32_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c32_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c32,_s64)(op1, op2, 2);
 }
@@ -109,7 +104,7 @@ svcount_t test_svwhilege_c32_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c32_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c32_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c32,_s64)(op1, op2, 4);
 }
@@ -124,7 +119,7 @@ svcount_t test_svwhilege_c32_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c64_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c64_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c64,_s64)(op1, op2, 2);
 }
@@ -139,7 +134,7 @@ svcount_t test_svwhilege_c64_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilege.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilege_c64_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilege_c64_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c64,_s64)(op1, op2, 4);
 }
@@ -157,7 +152,7 @@ svcount_t test_svwhilege_c64_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c8_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c8_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c8,_s64)(op1, op2, 2);
 }
@@ -172,7 +167,7 @@ svcount_t test_svwhilegt_c8_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c8_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c8_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c8,_s64)(op1, op2, 4);
 }
@@ -187,7 +182,7 @@ svcount_t test_svwhilegt_c8_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c16_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c16_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c16,_s64)(op1, op2, 2);
 }
@@ -202,7 +197,7 @@ svcount_t test_svwhilegt_c16_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c16_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c16_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c16,_s64)(op1, op2, 4);
 }
@@ -217,7 +212,7 @@ svcount_t test_svwhilegt_c16_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c32_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c32_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c32,_s64)(op1, op2, 2);
 }
@@ -232,7 +227,7 @@ svcount_t test_svwhilegt_c32_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c32_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c32_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c32,_s64)(op1, op2, 4);
 }
@@ -247,7 +242,7 @@ svcount_t test_svwhilegt_c32_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c64_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c64_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c64,_s64)(op1, op2, 2);
 }
@@ -262,7 +257,7 @@ svcount_t test_svwhilegt_c64_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilegt.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilegt_c64_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilegt_c64_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c64,_s64)(op1, op2, 4);
 }
@@ -280,7 +275,7 @@ svcount_t test_svwhilegt_c64_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c8_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c8_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c8,_u64)(op1, op2, 2);
 }
@@ -295,7 +290,7 @@ svcount_t test_svwhilehi_c8_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c8_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c8_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c8,_u64)(op1, op2, 4);
 }
@@ -310,7 +305,7 @@ svcount_t test_svwhilehi_c8_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c16_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c16_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c16,_u64)(op1, op2, 2);
 }
@@ -325,7 +320,7 @@ svcount_t test_svwhilehi_c16_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c16_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c16_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c16,_u64)(op1, op2, 4);
 }
@@ -340,7 +335,7 @@ svcount_t test_svwhilehi_c16_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c32_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c32_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c32,_u64)(op1, op2, 2);
 }
@@ -355,7 +350,7 @@ svcount_t test_svwhilehi_c32_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c32_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c32_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c32,_u64)(op1, op2, 4);
 }
@@ -370,7 +365,7 @@ svcount_t test_svwhilehi_c32_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c64_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c64_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c64,_u64)(op1, op2, 2);
 }
@@ -385,7 +380,7 @@ svcount_t test_svwhilehi_c64_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehi.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehi_c64_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehi_c64_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilegt_c64,_u64)(op1, op2, 4);
 }
@@ -403,7 +398,7 @@ svcount_t test_svwhilehi_c64_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c8_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c8_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c8,_u64)(op1, op2, 2);
 }
@@ -418,7 +413,7 @@ svcount_t test_svwhilehs_c8_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c8_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c8_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c8,_u64)(op1, op2, 4);
 }
@@ -433,7 +428,7 @@ svcount_t test_svwhilehs_c8_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c16_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c16_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c16,_u64)(op1, op2, 2);
 }
@@ -448,7 +443,7 @@ svcount_t test_svwhilehs_c16_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c16_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c16_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c16,_u64)(op1, op2, 4);
 }
@@ -463,7 +458,7 @@ svcount_t test_svwhilehs_c16_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c32_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c32_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c32,_u64)(op1, op2, 2);
 }
@@ -478,7 +473,7 @@ svcount_t test_svwhilehs_c32_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c32_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c32_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c32,_u64)(op1, op2, 4);
 }
@@ -493,7 +488,7 @@ svcount_t test_svwhilehs_c32_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c64_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c64_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c64,_u64)(op1, op2, 2);
 }
@@ -508,7 +503,7 @@ svcount_t test_svwhilehs_c64_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilehs.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilehs_c64_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilehs_c64_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilege_c64,_u64)(op1, op2, 4);
 }
@@ -526,7 +521,7 @@ svcount_t test_svwhilehs_c64_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c8_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c8_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c8,_s64)(op1, op2, 2);
 }
@@ -541,7 +536,7 @@ svcount_t test_svwhilele_c8_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c8_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c8_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c8,_s64)(op1, op2, 4);
 }
@@ -556,7 +551,7 @@ svcount_t test_svwhilele_c8_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c16_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c16_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c16,_s64)(op1, op2, 2);
 }
@@ -571,7 +566,7 @@ svcount_t test_svwhilele_c16_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c16_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c16_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c16,_s64)(op1, op2, 4);
 }
@@ -586,7 +581,7 @@ svcount_t test_svwhilele_c16_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c32_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c32_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c32,_s64)(op1, op2, 2);
 }
@@ -601,7 +596,7 @@ svcount_t test_svwhilele_c32_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c32_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c32_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c32,_s64)(op1, op2, 4);
 }
@@ -616,7 +611,7 @@ svcount_t test_svwhilele_c32_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c64_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c64_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c64,_s64)(op1, op2, 2);
 }
@@ -631,7 +626,7 @@ svcount_t test_svwhilele_c64_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilele.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilele_c64_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilele_c64_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c64,_s64)(op1, op2, 4);
 }
@@ -649,7 +644,7 @@ svcount_t test_svwhilele_c64_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c8_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c8_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c8,_u64)(op1, op2, 2);
 }
@@ -664,7 +659,7 @@ svcount_t test_svwhilelo_c8_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c8_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c8_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c8,_u64)(op1, op2, 4);
 }
@@ -679,7 +674,7 @@ svcount_t test_svwhilelo_c8_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c16_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c16_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c16,_u64)(op1, op2, 2);
 }
@@ -694,7 +689,7 @@ svcount_t test_svwhilelo_c16_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c16_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c16_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c16,_u64)(op1, op2, 4);
 }
@@ -709,7 +704,7 @@ svcount_t test_svwhilelo_c16_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c32_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c32_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c32,_u64)(op1, op2, 2);
 }
@@ -724,7 +719,7 @@ svcount_t test_svwhilelo_c32_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c32_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c32_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c32,_u64)(op1, op2, 4);
 }
@@ -739,7 +734,7 @@ svcount_t test_svwhilelo_c32_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c64_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c64_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c64,_u64)(op1, op2, 2);
 }
@@ -754,7 +749,7 @@ svcount_t test_svwhilelo_c64_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelo.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelo_c64_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilelo_c64_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c64,_u64)(op1, op2, 4);
 }
@@ -772,7 +767,7 @@ svcount_t test_svwhilelo_c64_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c8_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c8_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c8,_u64)(op1, op2, 2);
 }
@@ -787,7 +782,7 @@ svcount_t test_svwhilels_c8_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c8_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c8_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c8,_u64)(op1, op2, 4);
 }
@@ -802,7 +797,7 @@ svcount_t test_svwhilels_c8_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c16_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c16_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c16,_u64)(op1, op2, 2);
 }
@@ -817,7 +812,7 @@ svcount_t test_svwhilels_c16_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c16_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c16_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c16,_u64)(op1, op2, 4);
 }
@@ -832,7 +827,7 @@ svcount_t test_svwhilels_c16_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c32_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c32_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c32,_u64)(op1, op2, 2);
 }
@@ -847,7 +842,7 @@ svcount_t test_svwhilels_c32_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c32_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c32_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c32,_u64)(op1, op2, 4);
 }
@@ -862,7 +857,7 @@ svcount_t test_svwhilels_c32_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c64_vl2(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c64_vl2(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c64,_u64)(op1, op2, 2);
 }
@@ -877,7 +872,7 @@ svcount_t test_svwhilels_c64_vl2(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilels.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilels_c64_vl4(uint64_t op1, uint64_t op2) ATTR
+svcount_t test_svwhilels_c64_vl4(uint64_t op1, uint64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilele_c64,_u64)(op1, op2, 4);
 }
@@ -895,7 +890,7 @@ svcount_t test_svwhilels_c64_vl4(uint64_t op1, uint64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c8_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c8_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c8,_s64)(op1, op2, 2);
 }
@@ -910,7 +905,7 @@ svcount_t test_svwhilelt_c8_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c8(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c8_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c8_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c8,_s64)(op1, op2, 4);
 }
@@ -925,7 +920,7 @@ svcount_t test_svwhilelt_c8_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c16_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c16_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c16,_s64)(op1, op2, 2);
 }
@@ -940,7 +935,7 @@ svcount_t test_svwhilelt_c16_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c16(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c16_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c16_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c16,_s64)(op1, op2, 4);
 }
@@ -955,7 +950,7 @@ svcount_t test_svwhilelt_c16_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c32_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c32_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c32,_s64)(op1, op2, 2);
 }
@@ -970,7 +965,7 @@ svcount_t test_svwhilelt_c32_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c32(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c32_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c32_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c32,_s64)(op1, op2, 4);
 }
@@ -985,7 +980,7 @@ svcount_t test_svwhilelt_c32_vl4(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c64_vl2(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c64_vl2(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c64,_s64)(op1, op2, 2);
 }
@@ -1000,7 +995,7 @@ svcount_t test_svwhilelt_c64_vl2(int64_t op1, int64_t op2) ATTR
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call target("aarch64.svcount") @llvm.aarch64.sve.whilelt.c64(i64 [[OP1:%.*]], i64 [[OP2:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret target("aarch64.svcount") [[TMP0]]
 //
-svcount_t test_svwhilelt_c64_vl4(int64_t op1, int64_t op2) ATTR
+svcount_t test_svwhilelt_c64_vl4(int64_t op1, int64_t op2)
 {
   return SVE_ACLE_FUNC(svwhilelt_c64,_s64)(op1, op2, 4);
 }
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
index 5118f743174c25..6a6370bf99b108 100644
--- a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_imm.cpp
@@ -237,7 +237,7 @@ void test_svluti4_lane_zt_x2(svuint8_t zn_u8) __arm_streaming __arm_shared_za __
   svluti4_lane_zt_f32_x2(0, zn_u8, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
 }
 
-void test_bfmlslb_bad_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming {
+void test_bfmlslb_bad_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) __arm_streaming_compatible {
   svbfmlslb_lane_f32(zda, zn, zm, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
   svbfmlslt_lane_f32(zda, zn, zm, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
 }
diff --git a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c
deleted file mode 100644
index 4debc14190aa8a..00000000000000
--- a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s
-
-// REQUIRES: aarch64-registered-target
-#include "arm_sve.h"
-
-//svldnt1:
-
-__attribute__((target("+sme2")))
-svuint8x2_t sme2_or_sve2p1_intrinsic_test_sme2_invalid(svcount_t png, const uint8_t *rn) {
-  // expected-warning at +1 {{builtin call has undefined behaviour when called from a non-streaming function}}
-  return svldnt1_u8_x2(png, rn);
-}
-
-__attribute__((target("+sme2")))
-svint16x4_t sme2_or_sve2p1_intrinsic_test_sme2(svcount_t png, const int16_t *rn) __arm_streaming {
-  // expected-no-warning
-  return svldnt1_s16_x4(png, rn);
-}
-
-__attribute__((target("+sve2p1")))
-svuint32x2_t sme2_or_sve2p1_intrinsic_test_sve2p1(svcount_t png, const uint32_t *rn) {
-  // expected-no-warning
-  return svldnt1_u32_x2(png, rn);
-}
-
-__attribute__((target("+sme2,+sve2p1")))
-svint64x4_t sme2_or_sve2p1_intrinsic_test_both_arm_streaming(svcount_t png, const int64_t *rn) __arm_streaming {
-  // expected-no-warning
-  return svldnt1_s64_x4(png, rn);
-}
-
-__attribute__((target("+sme2,+sve2p1")))
-svint64x4_t sme2_or_sve2p1_intrinsic_test_both_no_arm_streaming(svcount_t png, const int64_t *rn) {
-  // expected-no-warning
-  return svldnt1_s64_x4(png, rn);
-}
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 5de2223e71b046..6c302da106a2cf 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1773,14 +1773,11 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
   llvm::StringMap<std::set<std::string>> StreamingMap;
 
   uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming");
-  uint64_t IsStreamingOrSVE2p1Flag = getEnumValueForFlag("IsStreamingOrSVE2p1");
   uint64_t IsStreamingCompatibleFlag =
       getEnumValueForFlag("IsStreamingCompatible");
   for (auto &Def : Defs) {
     if (Def->isFlagSet(IsStreamingFlag))
       StreamingMap["ArmStreaming"].insert(Def->getMangledName());
-    else if (Def->isFlagSet(IsStreamingOrSVE2p1Flag))
-      StreamingMap["ArmStreamingOrSVE2p1"].insert(Def->getMangledName());
     else if (Def->isFlagSet(IsStreamingCompatibleFlag))
       StreamingMap["ArmStreamingCompatible"].insert(Def->getMangledName());
     else