[clang] [Clang][AArch64] Expose compatible SVE intrinsics with only +sme (PR #95787)

Sander de Smalen via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 17 06:50:26 PDT 2024


https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/95787

This allows code with SVE intrinsics to be compiled with +sme,+nosve,                                                                                                                                                                                                                                                                                                   
assuming the encompassing function is in the correct mode (see #93802)                                                                                                                                                                                                                                                                                                  

>From b6c58b2488fe289c85799e3fa6aed9a092002cf8 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 17 Jun 2024 11:06:31 +0100
Subject: [PATCH 1/2] [Clang][AArch64] Expose compatible SVE intrinsics with
 only +sme

This allows code with SVE intrinsics to be compiled with +sme,+nosve,
assuming the encompassing function is in the correct mode (see #93802)
---
 clang/include/clang/Basic/arm_sve.td          | 118 +++++++++++-------
 clang/include/clang/Basic/arm_sve_sme_incl.td |   2 +-
 clang/utils/TableGen/SveEmitter.cpp           |   5 +
 3 files changed, 78 insertions(+), 47 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index f5972b41e7b50..f7d64d0d35d35 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -41,6 +41,7 @@ def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad,
 def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad, VerifyRuntimeMode],               MemEltTyInt32,   "aarch64_sve_ld1">;
 def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt32,   "aarch64_sve_ld1">;
 
+let TargetGuard = "sve" in {
 // Load one vector (vector base)
 def SVLD1_GATHER_BASES_U   : MInst<"svld1_gather[_{2}base]_{d}",   "dPu", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">;
 def SVLD1SB_GATHER_BASES_U : MInst<"svld1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt8,    "aarch64_sve_ld1_gather_scalar_offset">;
@@ -136,12 +137,14 @@ def SVLDFF1SH_VNUM : MInst<"svldff1sh_vnum_{d}", "dPTl", "ilUiUl",          [IsL
 def SVLDFF1UH_VNUM : MInst<"svldff1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldff1">;
 def SVLDFF1SW_VNUM : MInst<"svldff1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldff1">;
 def SVLDFF1UW_VNUM : MInst<"svldff1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldff1">;
+} //  let TargetGuard = "sve"
 
 let TargetGuard = "sve,bf16" in {
   def SVLDFF1_BF      : MInst<"svldff1[_{2}]",      "dPc",  "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
   def SVLDFF1_VNUM_BF : MInst<"svldff1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldff1">;
 }
 
+let TargetGuard = "sve" in {
 // First-faulting load one vector (vector base)
 def SVLDFF1_GATHER_BASES_U   : MInst<"svldff1_gather[_{2}base]_{d}",   "dPu", "ilUiUlfd", [IsGatherLoad],               MemEltTyDefault, "aarch64_sve_ldff1_gather_scalar_offset">;
 def SVLDFF1SB_GATHER_BASES_U : MInst<"svldff1sb_gather[_{2}base]_{d}", "dPu", "ilUiUl",   [IsGatherLoad],               MemEltTyInt8,    "aarch64_sve_ldff1_gather_scalar_offset">;
@@ -236,6 +239,7 @@ def SVLDNF1SH_VNUM : MInst<"svldnf1sh_vnum_{d}", "dPTl", "ilUiUl",          [IsL
 def SVLDNF1UH_VNUM : MInst<"svldnf1uh_vnum_{d}", "dPXl", "ilUiUl",          [IsLoad, IsZExtReturn], MemEltTyInt16,   "aarch64_sve_ldnf1">;
 def SVLDNF1SW_VNUM : MInst<"svldnf1sw_vnum_{d}", "dPUl", "lUl",             [IsLoad],               MemEltTyInt32,   "aarch64_sve_ldnf1">;
 def SVLDNF1UW_VNUM : MInst<"svldnf1uw_vnum_{d}", "dPYl", "lUl",             [IsLoad, IsZExtReturn], MemEltTyInt32,   "aarch64_sve_ldnf1">;
+} //  let TargetGuard = "sve"
 
 let TargetGuard = "sve,bf16" in {
   def SVLDNF1_BF      : MInst<"svldnf1[_{2}]",      "dPc",  "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnf1">;
@@ -286,10 +290,13 @@ let TargetGuard = "sve,f64mm,bf16" in {
 }
 
 let TargetGuard = "sve,bf16" in {
+  def SVBFMMLA       : SInst<"svbfmmla[_{0}]",       "MMdd",  "b", MergeNone, "aarch64_sve_bfmmla",       [IsOverloadNone]>;
+}
+
+let TargetGuard = "(sve,bf16)|sme" in {
   def SVBFDOT        : SInst<"svbfdot[_{0}]",        "MMdd",  "b", MergeNone, "aarch64_sve_bfdot",        [IsOverloadNone, VerifyRuntimeMode]>;
   def SVBFMLALB      : SInst<"svbfmlalb[_{0}]",      "MMdd",  "b", MergeNone, "aarch64_sve_bfmlalb",      [IsOverloadNone, VerifyRuntimeMode]>;
   def SVBFMLALT      : SInst<"svbfmlalt[_{0}]",      "MMdd",  "b", MergeNone, "aarch64_sve_bfmlalt",      [IsOverloadNone, VerifyRuntimeMode]>;
-  def SVBFMMLA       : SInst<"svbfmmla[_{0}]",       "MMdd",  "b", MergeNone, "aarch64_sve_bfmmla",       [IsOverloadNone, VerifyRuntimeMode]>;
   def SVBFDOT_N      : SInst<"svbfdot[_n_{0}]",      "MMda",  "b", MergeNone, "aarch64_sve_bfdot",        [IsOverloadNone, VerifyRuntimeMode]>;
   def SVBFMLAL_N     : SInst<"svbfmlalb[_n_{0}]",    "MMda",  "b", MergeNone, "aarch64_sve_bfmlalb",      [IsOverloadNone, VerifyRuntimeMode]>;
   def SVBFMLALT_N    : SInst<"svbfmlalt[_n_{0}]",    "MMda",  "b", MergeNone, "aarch64_sve_bfmlalt",      [IsOverloadNone, VerifyRuntimeMode]>;
@@ -356,6 +363,7 @@ let TargetGuard = "(sve,bf16)|sme" in {
   def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
 }
 
+let TargetGuard = "sve" in {
 // Store one vector (vector base)
 def SVST1_SCATTER_BASES_U     : MInst<"svst1_scatter[_{2}base_{d}]",  "vPud",  "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">;
 def SVST1B_SCATTER_BASES_U    : MInst<"svst1b_scatter[_{2}base_{d}]", "vPud",  "ilUiUl",   [IsScatterStore], MemEltTyInt8,    "aarch64_sve_st1_scatter_scalar_offset">;
@@ -424,10 +432,11 @@ def SVST1H_SCATTER_32B_INDICES_UU : MInst<"svst1h_scatter_[{3}]index[_{d}]", "vP
 def SVST1_SCATTER_INDEX_S     : MInst<"svst1_scatter[_{2}base]_index[_{d}]",  "vPuld", "ilUiUlfd", [IsScatterStore], MemEltTyDefault, "aarch64_sve_st1_scatter_scalar_offset">;
 def SVST1H_SCATTER_INDEX_S    : MInst<"svst1h_scatter[_{2}base]_index[_{d}]", "vPuld", "ilUiUl",   [IsScatterStore], MemEltTyInt16,   "aarch64_sve_st1_scatter_scalar_offset">;
 def SVST1W_SCATTER_INDEX_S    : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl",      [IsScatterStore], MemEltTyInt32,   "aarch64_sve_st1_scatter_scalar_offset">;
+} // let TargetGuard = "sve"
 
 multiclass StructStore<string name, string proto, string i> {
   def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
-  let TargetGuard = "sve,bf16" in {
+  let TargetGuard = "(sve,bf16)|sme" in {
     def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
   }
 }
@@ -499,6 +508,7 @@ def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch, VerifyRuntimeM
 def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_prf">;
 def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch, VerifyRuntimeMode], MemEltTyInt64, "aarch64_sve_prf">;
 
+let TargetGuard = "sve" in {
 // Prefetch (Vector bases)
 def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8,  "aarch64_sve_prfb_gather_scalar_offset">;
 def SVPRFH_GATHER_BASES : MInst<"svprfh_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
@@ -531,14 +541,17 @@ def SVPRFB_GATHER_BASES_OFFSET : MInst<"svprfb_gather[_{2}base]_offset", "vPdlJ"
 def SVPRFH_GATHER_BASES_OFFSET : MInst<"svprfh_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt16, "aarch64_sve_prfh_gather_scalar_offset">;
 def SVPRFW_GATHER_BASES_OFFSET : MInst<"svprfw_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt32, "aarch64_sve_prfw_gather_scalar_offset">;
 def SVPRFD_GATHER_BASES_OFFSET : MInst<"svprfd_gather[_{2}base]_index",  "vPdlJ", "UiUl", [IsGatherPrefetch], MemEltTyInt64, "aarch64_sve_prfd_gather_scalar_offset">;
+} // let TargetGuard = "sve"
 
 ////////////////////////////////////////////////////////////////////////////////
 // Address calculations
 
+let TargetGuard = "sve" in {
 def SVADRB : SInst<"svadrb[_{0}base]_[{2}]offset", "uud", "ilUiUl", MergeNone, "aarch64_sve_adrb">;
 def SVADRH : SInst<"svadrh[_{0}base]_[{2}]index",  "uud", "ilUiUl", MergeNone, "aarch64_sve_adrh">;
 def SVADRW : SInst<"svadrw[_{0}base]_[{2}]index",  "uud", "ilUiUl", MergeNone, "aarch64_sve_adrw">;
 def SVADRD : SInst<"svadrd[_{0}base]_[{2}]index",  "uud", "ilUiUl", MergeNone, "aarch64_sve_adrd">;
+} // let TargetGuard = "sve"
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scalar to vector
@@ -826,10 +839,12 @@ defm SVRINTX : SInstZPZ<"svrintx", "hfd", "aarch64_sve_frintx">;
 defm SVRINTZ : SInstZPZ<"svrintz", "hfd", "aarch64_sve_frintz">;
 defm SVSQRT  : SInstZPZ<"svsqrt",  "hfd", "aarch64_sve_fsqrt">;
 
+let TargetGuard = "sve" in {
 def SVEXPA  : SInst<"svexpa[_{d}]",  "du",   "hfd", MergeNone, "aarch64_sve_fexpa_x">;
 def SVTMAD  : SInst<"svtmad[_{d}]",  "dddi", "hfd", MergeNone, "aarch64_sve_ftmad_x", [], [ImmCheck<2, ImmCheck0_7>]>;
 def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu",  "hfd", MergeNone, "aarch64_sve_ftsmul_x">;
 def SVTSSEL : SInst<"svtssel[_{d}]", "ddu",  "hfd", MergeNone, "aarch64_sve_ftssel_x">;
+}
 
 def SVSCALE_M   : SInst<"svscale[_{d}]",   "dPdx", "hfd", MergeOp1,  "aarch64_sve_fscale", [VerifyRuntimeMode]>;
 def SVSCALE_X   : SInst<"svscale[_{d}]",   "dPdx", "hfd", MergeAny,  "aarch64_sve_fscale", [VerifyRuntimeMode]>;
@@ -992,7 +1007,7 @@ defm SVFCVT_F32_F64   : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarc
 defm SVFCVT_F64_F16   : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
 defm SVFCVT_F64_F32   : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVCVTLT_F32    : SInstCvtMX<"svcvtlt_f32[_f16]",  "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
 defm SVCVTLT_F64    : SInstCvtMX<"svcvtlt_f64[_f32]",  "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
 
@@ -1012,7 +1027,7 @@ def SVCVTXNT_F32    : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch6
 
 multiclass SVEPerm<string name, string proto, string i> {
   def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [VerifyRuntimeMode]>;
-  let TargetGuard = "sve,bf16" in {
+  let TargetGuard = "(sve,bf16)|sme" in {
     def: SInst<name, proto, "b", MergeNone, i, [VerifyRuntimeMode]>;
   }
 }
@@ -1022,19 +1037,22 @@ defm SVCLASTA_N  : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">;
 defm SVCLASTB    : SVEPerm<"svclastb[_{d}]",   "dPdd", "aarch64_sve_clastb">;
 defm SVCLASTB_N  : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">;
 
+let TargetGuard = "sve" in {
 def SVCOMPACT    : SInst<"svcompact[_{d}]",   "dPd",  "ilUiUlfd",        MergeNone, "aarch64_sve_compact">;
+}
+
 // Note: svdup_lane is implemented using the intrinsic for TBL to represent a
 // splat of any possible lane. It is upto LLVM to pick a more efficient
 // instruction such as DUP (indexed) if the lane index fits the range of the
 // instruction's immediate.
 def SVDUP_LANE   : SInst<"svdup_lane[_{d}]",  "ddL",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
 def SVDUP_LANE_BF16 :
                    SInst<"svdup_lane[_{d}]",  "ddL",  "b",               MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
 }
 
 def SVDUPQ_LANE  : SInst<"svdupq_lane[_{d}]", "ddn",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>;
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
   def SVDUPQ_LANE_BF16  : SInst<"svdupq_lane[_{d}]", "ddn",  "b", MergeNone, "aarch64_sve_dupq_lane", [VerifyRuntimeMode]>;
 }
 def SVEXT        : SInst<"svext[_{d}]",       "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [VerifyRuntimeMode], [ImmCheck<2, ImmCheckExtract, 1>]>;
@@ -1045,7 +1063,7 @@ def SVSEL        : SInst<"svsel[_{d}]",       "dPdd", "csilUcUsUiUlhfd", MergeNo
 def SVSPLICE     : SInst<"svsplice[_{d}]",    "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [VerifyRuntimeMode]>;
 def SVTBL        : SInst<"svtbl[_{d}]",       "ddu",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
 
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
   def SVTBL_BF16 : SInst<"svtbl[_{d}]",       "ddu",  "b",               MergeNone, "aarch64_sve_tbl", [VerifyRuntimeMode]>;
 }
 
@@ -1158,10 +1176,12 @@ def SVPTEST_LAST  : SInst<"svptest_last",  "sPP", "Pc", MergeNone, "aarch64_sve_
 ////////////////////////////////////////////////////////////////////////////////
 // FFR manipulation
 
+let TargetGuard = "sve" in {
 def SVRDFFR   : SInst<"svrdffr",   "Pv",  "Pc", MergeNone, "", [IsOverloadNone]>;
 def SVRDFFR_Z : SInst<"svrdffr_z", "PP", "Pc", MergeNone, "", [IsOverloadNone]>;
 def SVSETFFR  : SInst<"svsetffr",  "vv",  "",   MergeNone, "", [IsOverloadNone]>;
 def SVWRFFR   : SInst<"svwrffr",   "vP", "Pc", MergeNone, "", [IsOverloadNone]>;
+}
 
 ////////////////////////////////////////////////////////////////////////////////
 // Counting elements
@@ -1179,7 +1199,7 @@ def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendS
 def SVCNTP : SInst<"svcntp_{d}",  "nPP", "PcPsPiPl",        MergeNone, "aarch64_sve_cntp", [VerifyRuntimeMode]>;
 def SVLEN  : SInst<"svlen[_{d}]", "nd",  "csilUcUsUiUlhfd", MergeNone, "", [VerifyRuntimeMode]>;
 
-let TargetGuard = "sve,bf16" in {
+let TargetGuard = "(sve,bf16)|sme" in {
 def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone, "", [VerifyRuntimeMode]>;
 }
 
@@ -1249,7 +1269,9 @@ let TargetGuard = "sve,i8mm" in {
 def SVMLLA_S32   : SInst<"svmmla[_s32]",   "ddqq","i",  MergeNone, "aarch64_sve_smmla">;
 def SVMLLA_U32   : SInst<"svmmla[_u32]",   "ddqq","Ui", MergeNone, "aarch64_sve_ummla">;
 def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i",  MergeNone, "aarch64_sve_usmmla">;
+}
 
+let TargetGuard = "(sve|sme),i8mm" in {
 def SVUSDOT_S    : SInst<"svusdot[_s32]",    "ddbq", "i",       MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>;
 def SVUSDOT_N_S  : SInst<"svusdot[_n_s32]",  "ddbr", "i",       MergeNone, "aarch64_sve_usdot", [VerifyRuntimeMode]>;
 def SVSUDOT_S    : SInst<"svsudot[_s32]",    "ddqb", "i",       MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, VerifyRuntimeMode]>;
@@ -1265,21 +1287,21 @@ def SVMLLA_F32 : SInst<"svmmla[_f32]", "dddd","f", MergeNone, "aarch64_sve_fmmla
 
 let TargetGuard = "sve,f64mm" in {
 def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">;
-def SVTRN1Q      : SInst<"svtrn1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q", [VerifyRuntimeMode]>;
-def SVTRN2Q      : SInst<"svtrn2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q", [VerifyRuntimeMode]>;
-def SVUZP1Q      : SInst<"svuzp1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q", [VerifyRuntimeMode]>;
-def SVUZP2Q      : SInst<"svuzp2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q", [VerifyRuntimeMode]>;
-def SVZIP1Q      : SInst<"svzip1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q", [VerifyRuntimeMode]>;
-def SVZIP2Q      : SInst<"svzip2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q", [VerifyRuntimeMode]>;
+def SVTRN1Q      : SInst<"svtrn1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">;
+def SVTRN2Q      : SInst<"svtrn2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">;
+def SVUZP1Q      : SInst<"svuzp1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">;
+def SVUZP2Q      : SInst<"svuzp2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">;
+def SVZIP1Q      : SInst<"svzip1q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">;
+def SVZIP2Q      : SInst<"svzip2q[_{d}]",     "ddd",  "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">;
 }
 
 let TargetGuard = "sve,bf16,f64mm" in {
 def SVTRN1Q_BF16      : SInst<"svtrn1q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_trn1q">;
 def SVTRN2Q_BF16      : SInst<"svtrn2q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_trn2q">;
-def SVUZP1Q_BF16      : SInst<"svuzp1q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_uzp1q", [VerifyRuntimeMode]>;
-def SVUZP2Q_BF16      : SInst<"svuzp2q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_uzp2q", [VerifyRuntimeMode]>;
-def SVZIP1Q_BF16      : SInst<"svzip1q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_zip1q", [VerifyRuntimeMode]>;
-def SVZIP2Q_BF16      : SInst<"svzip2q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_zip2q", [VerifyRuntimeMode]>;
+def SVUZP1Q_BF16      : SInst<"svuzp1q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_uzp1q">;
+def SVUZP2Q_BF16      : SInst<"svuzp2q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_uzp2q">;
+def SVZIP1Q_BF16      : SInst<"svzip1q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_zip1q">;
+def SVZIP2Q_BF16      : SInst<"svzip2q[_{d}]",     "ddd",  "b", MergeNone, "aarch64_sve_zip2q">;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -1347,7 +1369,7 @@ let TargetGuard = "sve2p1|sme2" in {
 }
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 WhileGE/GT
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl",     MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
 def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl",     MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
 def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl",     MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
@@ -1373,7 +1395,7 @@ let TargetGuard = "sve2p1|sme2"  in {
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Uniform DSP operations
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVQADD_S  : SInstZPZZ<"svqadd",  "csli",     "aarch64_sve_sqadd",  "aarch64_sve_sqadd">;
 defm SVQADD_U  : SInstZPZZ<"svqadd",  "UcUsUiUl", "aarch64_sve_uqadd",  "aarch64_sve_uqadd">;
 defm SVHADD_S  : SInstZPZZ<"svhadd",  "csli",     "aarch64_sve_shadd",  "aarch64_sve_shadd">;
@@ -1408,7 +1430,7 @@ multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, str
   def _N_Z : SInst<name # "[_n_{d}]", pat_n, types, MergeZero, intrinsic, flags>;
 }
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil",     "dPdx", "dPdK", "aarch64_sve_sqrshl", [VerifyRuntimeMode]>;
 defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [VerifyRuntimeMode]>;
 defm SVQSHL_S  : SInstZPZxZ<"svqshl",  "csil",     "dPdx", "dPdK", "aarch64_sve_sqshl", [VerifyRuntimeMode]>;
@@ -1462,7 +1484,7 @@ multiclass SInstPairwise<string name, string types, string intrinsic, list<FlagT
   def _X   : SInst<name # "[_{d}]", "dPdd", types, MergeAny, intrinsic, flags>;
 }
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVADDP   : SInstPairwise<"svaddp",   "csliUcUsUiUl", "aarch64_sve_addp", [VerifyRuntimeMode]>;
 defm SVADDP_F : SInstPairwise<"svaddp",   "hfd",          "aarch64_sve_faddp", [VerifyRuntimeMode]>;
 defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd",          "aarch64_sve_fmaxnmp", [VerifyRuntimeMode]>;
@@ -1478,7 +1500,7 @@ defm SVMINP_U : SInstPairwise<"svminp",   "UcUsUiUl",     "aarch64_sve_uminp", [
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Widening pairwise arithmetic
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil",    MergeOp1,  "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
 def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil",    MergeAny,  "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
 def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil",    MergeZero, "aarch64_sve_sadalp", [VerifyRuntimeMode]>;
@@ -1492,7 +1514,7 @@ def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_s
 // SVE2 - Bitwise ternary logical instructions
 //
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVBCAX  : SInst<"svbcax[_{d}]",  "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [VerifyRuntimeMode]>;
 def SVBSL   : SInst<"svbsl[_{d}]",   "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [VerifyRuntimeMode]>;
 def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [VerifyRuntimeMode]>;
@@ -1512,7 +1534,7 @@ def SVXAR_N   : SInst<"svxar[_n_{d}]",   "dddi", "csilUcUsUiUl", MergeNone, "aar
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Large integer arithmetic
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVADCLB : SInst<"svadclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclb", [VerifyRuntimeMode]>;
 def SVADCLT : SInst<"svadclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclt", [VerifyRuntimeMode]>;
 def SVSBCLB : SInst<"svsbclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclb", [VerifyRuntimeMode]>;
@@ -1535,7 +1557,7 @@ def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi",  "silUsUiUl", MergeNone, "a
 
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Uniform complex integer arithmetic
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVCADD             : SInst<"svcadd[_{d}]",          "dddi",   "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x",           [VerifyRuntimeMode], [ImmCheck<2, ImmCheckComplexRot90_270>]>;
 def SVSQCADD           : SInst<"svqcadd[_{d}]",         "dddi",   "csil",         MergeNone, "aarch64_sve_sqcadd_x",         [VerifyRuntimeMode], [ImmCheck<2, ImmCheckComplexRot90_270>]>;
 def SVCMLA             : SInst<"svcmla[_{d}]",          "ddddi",  "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x",           [VerifyRuntimeMode], [ImmCheck<3, ImmCheckComplexRotAll90>]>;
@@ -1564,7 +1586,7 @@ multiclass SInstWideDSPWide<string name, string types, string intrinsic> {
   def _N : SInst<name # "[_n_{d}]", "ddR", types, MergeNone, intrinsic, [VerifyRuntimeMode]>;
 }
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVABALB_S : SInstWideDSPAcc<"svabalb",   "sil",    "aarch64_sve_sabalb">;
 defm SVABALB_U : SInstWideDSPAcc<"svabalb",   "UsUiUl", "aarch64_sve_uabalb">;
 defm SVABALT_S : SInstWideDSPAcc<"svabalt",   "sil",    "aarch64_sve_sabalt">;
@@ -1643,7 +1665,7 @@ def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi",  "il",   MergeNone, "
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Narrowing DSP operations
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVADDHNB   : SInst<"svaddhnb[_{d}]",     "hdd",  "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [VerifyRuntimeMode]>;
 def SVADDHNT   : SInst<"svaddhnt[_{d}]",     "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [VerifyRuntimeMode]>;
 def SVRADDHNB  : SInst<"svraddhnb[_{d}]",    "hdd",  "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [VerifyRuntimeMode]>;
@@ -1683,7 +1705,7 @@ def SVQRSHRNT_U  : SInst<"svqrshrnt[_n_{d}]",  "hhdi", "UsUiUl",    MergeNone, "
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Unary narrowing operations
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVQXTNB_S  : SInst<"svqxtnb[_{d}]",  "hd",  "sil",     MergeNone, "aarch64_sve_sqxtnb", [VerifyRuntimeMode]>;
 def SVQXTNB_U  : SInst<"svqxtnb[_{d}]",  "hd",  "UsUiUl",  MergeNone, "aarch64_sve_uqxtnb", [VerifyRuntimeMode]>;
 def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed",  "sil",     MergeNone, "aarch64_sve_sqxtunb", [VerifyRuntimeMode]>;
@@ -1696,7 +1718,7 @@ def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil",     MergeNone, "aarch64_s
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Widening complex integer arithmetic
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 defm SVADDLBT : SInstWideDSPLong<"svaddlbt", "sil", "aarch64_sve_saddlbt">;
 defm SVSUBLBT : SInstWideDSPLong<"svsublbt", "sil", "aarch64_sve_ssublbt">;
 defm SVSUBLTB : SInstWideDSPLong<"svsubltb", "sil", "aarch64_sve_ssubltb">;
@@ -1830,7 +1852,7 @@ def SVSTNT1W_SCATTER_INDEX_S : MInst<"svstnt1w_scatter[_{2}base]_index[_{d}]", "
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Polynomial arithmetic
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVEORBT         : SInst<"sveorbt[_{d}]",         "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [VerifyRuntimeMode]>;
 def SVEORBT_N       : SInst<"sveorbt[_n_{d}]",       "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [VerifyRuntimeMode]>;
 def SVEORTB         : SInst<"sveortb[_{d}]",         "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [VerifyRuntimeMode]>;
@@ -1850,7 +1872,7 @@ def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda",  "UcUi",         Mer
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Complex integer dot product
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVCDOT      : SInst<"svcdot[_{d}]",      "ddqqi",  "il",   MergeNone, "aarch64_sve_cdot",      [VerifyRuntimeMode], [ImmCheck<3, ImmCheckComplexRotAll90>]>;
 def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il",   MergeNone, "aarch64_sve_cdot_lane", [VerifyRuntimeMode], [ImmCheck<4, ImmCheckComplexRotAll90>,
                                                                                                         ImmCheck<3, ImmCheckLaneIndexDot, 2>]>;
@@ -1859,7 +1881,7 @@ def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il",   MergeNone, "aarch
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Floating-point widening multiply-accumulate
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVMLALB_F      : SInst<"svmlalb[_{d}]",      "ddhh",  "f",   MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>;
 def SVMLALB_F_N    : SInst<"svmlalb[_n_{d}]",    "ddhR",  "f",   MergeNone, "aarch64_sve_fmlalb", [VerifyRuntimeMode]>;
 def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f",   MergeNone, "aarch64_sve_fmlalb_lane", [VerifyRuntimeMode], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
@@ -1877,7 +1899,7 @@ def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f",   MergeNone, "aar
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Floating-point integer binary logarithm
 
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVLOGB_M  : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1,     "aarch64_sve_flogb", [VerifyRuntimeMode]>;
 def SVLOGB_X  : SInst<"svlogb[_{d}]", "xPd",  "hfd", MergeAnyExp,  "aarch64_sve_flogb", [VerifyRuntimeMode]>;
 def SVLOGB_Z  : SInst<"svlogb[_{d}]", "xPd",  "hfd", MergeZeroExp, "aarch64_sve_flogb", [VerifyRuntimeMode]>;
@@ -1901,7 +1923,7 @@ def SVNMATCH : SInst<"svnmatch[_{d}]", "PPdd", "csUcUs", MergeNone, "aarch64_sve
 
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Contiguous conflict detection
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc",  MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW, VerifyRuntimeMode]>;
 def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, VerifyRuntimeMode]>;
 def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW, VerifyRuntimeMode]>;
@@ -1913,19 +1935,19 @@ def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sv
 def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW, VerifyRuntimeMode]>;
 }
 
-let TargetGuard = "sve2,bf16" in {
+let TargetGuard = "(sve2,bf16)|sme" in {
 def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, VerifyRuntimeMode]>;
 def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, VerifyRuntimeMode]>;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
 // SVE2 - Extended table lookup/permute
-let TargetGuard = "sve2" in {
+let TargetGuard = "sve2|sme" in {
 def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u",  "csilUcUsUiUlhfd", MergeNone, "", [VerifyRuntimeMode]>;
 def SVTBX  : SInst<"svtbx[_{d}]",  "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>;
 }
 
-let TargetGuard = "sve2,bf16" in {
+let TargetGuard = "(sve2,bf16)|sme" in {
 def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u",  "b", MergeNone, "", [VerifyRuntimeMode]>;
 def SVTBX_BF16  : SInst<"svtbx[_{d}]",  "dddu", "b", MergeNone, "aarch64_sve_tbx", [VerifyRuntimeMode]>;
 }
@@ -2233,15 +2255,7 @@ let TargetGuard = "sve2p1" in {
   def SVTBXQ : SInst<"svtbxq[_{d}]", "dddu", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_tbxq">;
   // EXTQ
   def EXTQ : SInst<"svextq[_{d}]", "dddk", "cUcsUsiUilUlbhfd", MergeNone, "aarch64_sve_extq", [], [ImmCheck<2, ImmCheck0_15>]>;
-  // DUPQ
-  def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUc", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>;
-  def SVDUP_LANEQ_H  : SInst<"svdup_laneq[_{d}]", "ddi",  "sUsh", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
-  def SVDUP_LANEQ_S  : SInst<"svdup_laneq[_{d}]", "ddi",  "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
-  def SVDUP_LANEQ_D  : SInst<"svdup_laneq[_{d}]", "ddi",  "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
 
-  let TargetGuard = "bf16" in {
-    def SVDUP_LANEQ_BF16  : SInst<"svdup_laneq[_{d}]", "ddi",  "b", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
-  }
   // PMOV
   // Move to Pred
   multiclass PMOV_TO_PRED<string name, string types, string intrinsic, list<FlagType> flags=[], ImmCheckType immCh > {
@@ -2264,6 +2278,18 @@ let TargetGuard = "sve2p1" in {
   defm SVPMOV_TO_VEC_LANE_D : PMOV_TO_VEC<"svpmov", "lUl", "aarch64_sve_pmov_to_vector_lane" ,[], ImmCheck1_7>;
 }
 
+let TargetGuard = "sve2p1|sme2" in {
+  // DUPQ
+  def SVDUP_LANEQ_B  : SInst<"svdup_laneq[_{d}]", "ddi",  "cUc", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_15>]>;
+  def SVDUP_LANEQ_H  : SInst<"svdup_laneq[_{d}]", "ddi",  "sUsh", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
+  def SVDUP_LANEQ_S  : SInst<"svdup_laneq[_{d}]", "ddi",  "iUif", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_3>]>;
+  def SVDUP_LANEQ_D  : SInst<"svdup_laneq[_{d}]", "ddi",  "lUld", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "(sve2p1,bf16)|sme2" in {
+  def SVDUP_LANEQ_BF16  : SInst<"svdup_laneq[_{d}]", "ddi",  "b", MergeNone, "aarch64_sve_dup_laneq", [VerifyRuntimeMode], [ImmCheck<1, ImmCheck0_7>]>;
+}
+
 //
 // Multi-vector convert to/from floating-point.
 //
diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td
index 37e3925509836..707f445858067 100644
--- a/clang/include/clang/Basic/arm_sve_sme_incl.td
+++ b/clang/include/clang/Basic/arm_sve_sme_incl.td
@@ -272,7 +272,7 @@ class Inst<string n, string p, string t, MergeType mt, string i,
   string Name = n;
   string Prototype = p;
   string Types = t;
-  string TargetGuard = "sve";
+  string TargetGuard = "sve|sme";
   int Merge = mt.Value;
   string MergeSuffix = mt.Suffix;
   string LLVMIntrinsic = i;
diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 7d25914c73539..ad4b022b92640 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -1781,7 +1781,12 @@ void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) {
   uint64_t VerifyRuntimeMode = getEnumValueForFlag("VerifyRuntimeMode");
   uint64_t IsStreamingCompatibleFlag =
       getEnumValueForFlag("IsStreamingCompatible");
+
   for (auto &Def : Defs) {
+    assert((((Def->getGuard().contains("sve") +
+              Def->getGuard().contains("sme")) <= 1) ||
+            Def->isFlagSet(VerifyRuntimeMode)) &&
+           "Missing VerifyRuntimeMode flag");
     if (Def->isFlagSet(IsStreamingFlag))
       StreamingMap["ArmStreaming"].insert(Def->getMangledName());
     else if (Def->isFlagSet(VerifyRuntimeMode))

>From 880dc1b2e5c52c7704a61e78cf5b845ca6555edf Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Mon, 17 Jun 2024 14:22:59 +0100
Subject: [PATCH 2/2] Add RUN lines and fix up tests

---
 .../aarch64-sve-intrinsics/acle_sve_abd.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_abs.c     |   50 +-
 .../aarch64-sve-intrinsics/acle_sve_acge.c    |   18 +-
 .../aarch64-sve-intrinsics/acle_sve_acgt.c    |   18 +-
 .../aarch64-sve-intrinsics/acle_sve_acle.c    |   18 +-
 .../aarch64-sve-intrinsics/acle_sve_aclt.c    |   18 +-
 .../aarch64-sve-intrinsics/acle_sve_add.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_addv.c    |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_and.c     |  106 +-
 .../aarch64-sve-intrinsics/acle_sve_andv.c    |   24 +-
 .../aarch64-sve-intrinsics/acle_sve_asr.c     |   92 +-
 .../aarch64-sve-intrinsics/acle_sve_asrd.c    |   40 +-
 .../aarch64-sve-intrinsics/acle_sve_bfdot.c   |   17 +-
 .../aarch64-sve-intrinsics/acle_sve_bfmlalb.c |   17 +-
 .../aarch64-sve-intrinsics/acle_sve_bfmlalt.c |   17 +-
 .../aarch64-sve-intrinsics/acle_sve_bic.c     |  106 +-
 .../aarch64-sve-intrinsics/acle_sve_brka.c    |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_brkb.c    |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_brkn.c    |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_brkpa.c   |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_brkpb.c   |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_cadd.c    |   28 +-
 .../acle_sve_clasta-bfloat.c                  |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_clasta.c  |   52 +-
 .../acle_sve_clastb-bfloat.c                  |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_clastb.c  |   52 +-
 .../aarch64-sve-intrinsics/acle_sve_cls.c     |   32 +-
 .../aarch64-sve-intrinsics/acle_sve_clz.c     |   56 +-
 .../aarch64-sve-intrinsics/acle_sve_cmla.c    |   40 +-
 .../aarch64-sve-intrinsics/acle_sve_cmpeq.c   |   64 +-
 .../aarch64-sve-intrinsics/acle_sve_cmpge.c   |   76 +-
 .../aarch64-sve-intrinsics/acle_sve_cmpgt.c   |   76 +-
 .../aarch64-sve-intrinsics/acle_sve_cmple.c   |   76 +-
 .../aarch64-sve-intrinsics/acle_sve_cmplt.c   |   76 +-
 .../aarch64-sve-intrinsics/acle_sve_cmpne.c   |   64 +-
 .../aarch64-sve-intrinsics/acle_sve_cmpuo.c   |   20 +-
 .../aarch64-sve-intrinsics/acle_sve_cnot.c    |   56 +-
 .../acle_sve_cnt-bfloat.c                     |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_cnt.c     |   74 +-
 .../aarch64-sve-intrinsics/acle_sve_cntb.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_cntd.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_cnth.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_cntp.c    |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_cntw.c    |   44 +-
 .../acle_sve_create2-bfloat.c                 |    6 +-
 .../aarch64-sve-intrinsics/acle_sve_create2.c |    4 +-
 .../acle_sve_create3-bfloat.c                 |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_create3.c |    4 +-
 .../acle_sve_create4-bfloat.c                 |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_create4.c |    4 +-
 .../acle_sve_cvt-bfloat.c                     |   17 +-
 .../aarch64-sve-intrinsics/acle_sve_cvt.c     |  200 +-
 .../aarch64-sve-intrinsics/acle_sve_cvtnt.c   |   15 +-
 .../aarch64-sve-intrinsics/acle_sve_div.c     |   92 +-
 .../aarch64-sve-intrinsics/acle_sve_divr.c    |   92 +-
 .../aarch64-sve-intrinsics/acle_sve_dot.c     |   36 +-
 .../acle_sve_dup-bfloat.c                     |   18 +-
 .../aarch64-sve-intrinsics/acle_sve_dup.c     |  126 +-
 .../acle_sve_dupq-bfloat.c                    |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_dupq.c    |   60 +-
 .../aarch64-sve-intrinsics/acle_sve_eor.c     |  106 +-
 .../aarch64-sve-intrinsics/acle_sve_eorv.c    |   24 +-
 .../acle_sve_ext-bfloat.c                     |   12 +-
 .../aarch64-sve-intrinsics/acle_sve_ext.c     |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_extb.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_exth.c    |   32 +-
 .../aarch64-sve-intrinsics/acle_sve_extw.c    |   20 +-
 .../acle_sve_get2-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_get2.c    |    2 +
 .../acle_sve_get3-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_get3.c    |    2 +
 .../acle_sve_get4-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_get4.c    |    2 +
 .../aarch64-sve-intrinsics/acle_sve_index.c   |   24 +-
 .../acle_sve_insr-bfloat.c                    |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_insr.c    |   30 +-
 .../acle_sve_lasta-bfloat.c                   |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_lasta.c   |   30 +-
 .../acle_sve_lastb-bfloat.c                   |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_lastb.c   |   30 +-
 .../acle_sve_ld1-bfloat.c                     |   15 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1.c     |   55 +-
 .../acle_sve_ld1rq-bfloat.c                   |   13 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1rq.c   |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1sb.c   |   36 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1sh.c   |   28 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1sw.c   |   20 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1ub.c   |   36 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1uh.c   |   28 +-
 .../aarch64-sve-intrinsics/acle_sve_ld1uw.c   |   20 +-
 .../acle_sve_ld2-bfloat.c                     |   15 +-
 .../aarch64-sve-intrinsics/acle_sve_ld2.c     |   56 +-
 .../acle_sve_ld3-bfloat.c                     |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_ld3.c     |   55 +-
 .../acle_sve_ld4-bfloat.c                     |   15 +-
 .../aarch64-sve-intrinsics/acle_sve_ld4.c     |   55 +-
 .../acle_sve_ldnt1-bfloat.c                   |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_ldnt1.c   |   52 +-
 .../acle_sve_len-bfloat.c                     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_len.c     |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_lsl.c     |  110 +-
 .../aarch64-sve-intrinsics/acle_sve_lsr.c     |   68 +-
 .../aarch64-sve-intrinsics/acle_sve_mad.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_max.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_maxnm.c   |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_maxnmv.c  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_maxv.c    |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_min.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_minnm.c   |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_minnmv.c  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_minv.c    |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_mla.c     |  152 +-
 .../aarch64-sve-intrinsics/acle_sve_mls.c     |  152 +-
 .../aarch64-sve-intrinsics/acle_sve_mov.c     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_msb.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_mul.c     |  152 +-
 .../aarch64-sve-intrinsics/acle_sve_mulh.c    |  104 +-
 .../aarch64-sve-intrinsics/acle_sve_mulx.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_nand.c    |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_neg.c     |   50 +-
 .../aarch64-sve-intrinsics/acle_sve_nmad.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_nmla.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_nmls.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_nmsb.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_nor.c     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_not.c     |   58 +-
 .../aarch64-sve-intrinsics/acle_sve_orn.c     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_orr.c     |  106 +-
 .../aarch64-sve-intrinsics/acle_sve_orv.c     |   24 +-
 .../aarch64-sve-intrinsics/acle_sve_pfalse.c  |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_pfirst.c  |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_pnext.c   |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_prfb.c    |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_prfd.c    |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_prfh.c    |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_prfw.c    |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_ptest.c   |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_ptrue.c   |   56 +-
 .../aarch64-sve-intrinsics/acle_sve_qadd.c    |   40 +-
 .../aarch64-sve-intrinsics/acle_sve_qdecb.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_qdecd.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qdech.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qdecp.c   |   52 +-
 .../aarch64-sve-intrinsics/acle_sve_qdecw.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qincb.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_qincd.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qinch.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qincp.c   |   52 +-
 .../aarch64-sve-intrinsics/acle_sve_qincw.c   |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_qsub.c    |   40 +-
 .../aarch64-sve-intrinsics/acle_sve_rbit.c    |   56 +-
 .../aarch64-sve-intrinsics/acle_sve_recpe.c   |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_recps.c   |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_recpx.c   |   26 +-
 .../acle_sve_rev-bfloat.c                     |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_rev.c     |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_revb.c    |   44 +-
 .../aarch64-sve-intrinsics/acle_sve_revh.c    |   32 +-
 .../aarch64-sve-intrinsics/acle_sve_revw.c    |   20 +-
 .../aarch64-sve-intrinsics/acle_sve_rinta.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rinti.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rintm.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rintn.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rintp.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rintx.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rintz.c   |   26 +-
 .../aarch64-sve-intrinsics/acle_sve_rsqrte.c  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_rsqrts.c  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_scale.c   |   44 +-
 .../acle_sve_sel-bfloat.c                     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_sel.c     |   32 +-
 .../acle_sve_set2-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_set2.c    |    4 +-
 .../acle_sve_set3-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_set3.c    |    4 +-
 .../acle_sve_set4-bfloat.c                    |    4 +-
 .../aarch64-sve-intrinsics/acle_sve_set4.c    |    4 +-
 .../acle_sve_splice-bfloat.c                  |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_splice.c  |   30 +-
 .../aarch64-sve-intrinsics/acle_sve_sqrt.c    |   26 +-
 .../acle_sve_st1-bfloat.c                     |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_st1.c     |   56 +-
 .../aarch64-sve-intrinsics/acle_sve_st1b.c    |   37 +-
 .../aarch64-sve-intrinsics/acle_sve_st1h.c    |   29 +-
 .../aarch64-sve-intrinsics/acle_sve_st1w.c    |   20 +-
 .../acle_sve_st2-bfloat.c                     |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_st2.c     |   52 +-
 .../acle_sve_st3-bfloat.c                     |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_st3.c     |   52 +-
 .../acle_sve_st4-bfloat.c                     |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_st4.c     |   52 +-
 .../acle_sve_stnt1-bfloat.c                   |   16 +-
 .../aarch64-sve-intrinsics/acle_sve_stnt1.c   |   52 +-
 .../aarch64-sve-intrinsics/acle_sve_sub.c     |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_subr.c    |  140 +-
 .../aarch64-sve-intrinsics/acle_sve_sudot.c   |   22 +-
 .../acle_sve_tbl-bfloat.c                     |   10 +-
 .../aarch64-sve-intrinsics/acle_sve_tbl.c     |   30 +-
 .../acle_sve_trn1-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_trn1.c    |   38 +-
 .../acle_sve_trn2-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_trn2.c    |   38 +-
 .../acle_sve_undef-bfloat.c                   |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_undef.c   |   30 +-
 .../acle_sve_undef2-bfloat.c                  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_undef2.c  |   34 +-
 .../acle_sve_undef3-bfloat.c                  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_undef3.c  |   34 +-
 .../acle_sve_undef4-bfloat.c                  |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_undef4.c  |   34 +-
 .../aarch64-sve-intrinsics/acle_sve_unpklo.c  |   22 +-
 .../aarch64-sve-intrinsics/acle_sve_usdot.c   |   21 +-
 .../acle_sve_uzp1-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_uzp1.c    |   38 +-
 .../acle_sve_uzp2-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_uzp2.c    |   38 +-
 .../aarch64-sve-intrinsics/acle_sve_whilele.c |   40 +-
 .../aarch64-sve-intrinsics/acle_sve_whilelt.c |   40 +-
 .../acle_sve_zip1-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_zip1.c    |   38 +-
 .../acle_sve_zip2-bfloat.c                    |   14 +-
 .../aarch64-sve-intrinsics/acle_sve_zip2.c    |   38 +-
 .../aarch64-sve2-intrinsics/acle_sve2.cpp     | 5660 ++++++++---------
 .../acle_sve2_bfloat.cpp                      |   16 +-
 224 files changed, 7891 insertions(+), 6204 deletions(-)

diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
index 18ef16feff3ec..f4ae472c9f82c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svabd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svabd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svabd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svabd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svabd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svabd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svabd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svabd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svabd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svabd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svabd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svabd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svabd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svabd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svabd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svabd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svabd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svabd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svabd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svabd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svabd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svabd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svabd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svabd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svabd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svabd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svabd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svabd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svabd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svabd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svabd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svabd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svabd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svabd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svabd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svabd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svabd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svabd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svabd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svabd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svabd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svabd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svabd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svabd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svabd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svabd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svabd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svabd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svabd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svabd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svabd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svabd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svabd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svabd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svabd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svabd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svabd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svabd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svabd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svabd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svabd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svabd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svabd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svabd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svabd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svabd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svabd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svabd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svabd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svabd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svabd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svabd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svabd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svabd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svabd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svabd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svabd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svabd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svabd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svabd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svabd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svabd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svabd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svabd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svabd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svabd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svabd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svabd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svabd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svabd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svabd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svabd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svabd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svabd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svabd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svabd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svabd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svabd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svabd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svabd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svabd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svabd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svabd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svabd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svabd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svabd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svabd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svabd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svabd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svabd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svabd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svabd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svabd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svabd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svabd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svabd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svabd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svabd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svabd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svabd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svabd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svabd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svabd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svabd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svabd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svabd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svabd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svabd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svabd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svabd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svabd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svabd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svabd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svabd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svabd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svabd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svabd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svabd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svabd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svabd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svabd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svabd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svabd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svabd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svabd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svabd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svabd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svabd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svabd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svabd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svabd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svabd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svabd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svabd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svabd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svabd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svabd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svabd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svabd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svabd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svabd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svabd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svabd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svabd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svabd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svabd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svabd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svabd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svabd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabd,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
index fab20023844e4..d4125fa3ac995 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_abs.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabs_s8_z(svbool_t pg, svint8_t op)
+svint8_t test_svabs_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svint8_t test_svabs_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabs_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svabs_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svint16_t test_svabs_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabs_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svabs_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svint32_t test_svabs_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabs_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svabs_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svint64_t test_svabs_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabs_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
+svint8_t test_svabs_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s8,_m,)(inactive, pg, op);
 }
@@ -107,7 +115,7 @@ svint8_t test_svabs_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabs_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svabs_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s16,_m,)(inactive, pg, op);
 }
@@ -124,7 +132,7 @@ svint16_t test_svabs_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabs_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svabs_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s32,_m,)(inactive, pg, op);
 }
@@ -141,7 +149,7 @@ svint32_t test_svabs_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabs_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svabs_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s64,_m,)(inactive, pg, op);
 }
@@ -156,7 +164,7 @@ svint64_t test_svabs_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svabs_s8_x(svbool_t pg, svint8_t op)
+svint8_t test_svabs_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s8,_x,)(pg, op);
 }
@@ -173,7 +181,7 @@ svint8_t test_svabs_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svabs_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svabs_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s16,_x,)(pg, op);
 }
@@ -190,7 +198,7 @@ svint16_t test_svabs_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svabs_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svabs_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s32,_x,)(pg, op);
 }
@@ -207,7 +215,7 @@ svint32_t test_svabs_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.abs.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svabs_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svabs_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_s64,_x,)(pg, op);
 }
@@ -224,7 +232,7 @@ svint64_t test_svabs_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabs_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svabs_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f16,_z,)(pg, op);
 }
@@ -241,7 +249,7 @@ svfloat16_t test_svabs_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabs_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svabs_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f32,_z,)(pg, op);
 }
@@ -258,7 +266,7 @@ svfloat32_t test_svabs_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabs_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svabs_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f64,_z,)(pg, op);
 }
@@ -275,7 +283,7 @@ svfloat64_t test_svabs_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabs_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svabs_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f16,_m,)(inactive, pg, op);
 }
@@ -292,7 +300,7 @@ svfloat16_t test_svabs_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabs_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svabs_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f32,_m,)(inactive, pg, op);
 }
@@ -309,7 +317,7 @@ svfloat32_t test_svabs_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabs_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svabs_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f64,_m,)(inactive, pg, op);
 }
@@ -326,7 +334,7 @@ svfloat64_t test_svabs_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svabs_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svabs_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f16,_x,)(pg, op);
 }
@@ -343,7 +351,7 @@ svfloat16_t test_svabs_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svabs_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svabs_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f32,_x,)(pg, op);
 }
@@ -360,7 +368,7 @@ svfloat32_t test_svabs_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svabs_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svabs_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svabs,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
index 0133223fb43e4..d51e8e0a946c7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acge.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svacge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacge,_f16,,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svbool_t test_svacge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svacge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacge,_f32,,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svbool_t test_svacge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svacge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacge,_f64,,)(pg, op1, op2);
 }
@@ -89,7 +97,7 @@ svbool_t test_svacge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svacge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacge,_n_f32,,)(pg, op1, op2);
 }
@@ -112,7 +120,7 @@ svbool_t test_svacge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svacge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacge,_n_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
index 1b1e6cbca8598..d1c780c50a1d2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acgt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svacgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacgt,_f16,,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svbool_t test_svacgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svacgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacgt,_f32,,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svbool_t test_svacgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svacgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacgt,_f64,,)(pg, op1, op2);
 }
@@ -89,7 +97,7 @@ svbool_t test_svacgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svacgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacgt,_n_f32,,)(pg, op1, op2);
 }
@@ -112,7 +120,7 @@ svbool_t test_svacgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svacgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacgt,_n_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
index 9cfb26eef2069..f1de4d195d5c4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_acle.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacle_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svacle_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacle,_f16,,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svbool_t test_svacle_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacle_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svacle_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacle,_f32,,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svbool_t test_svacle_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacle_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svacle_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacle,_f64,,)(pg, op1, op2);
 }
@@ -89,7 +97,7 @@ svbool_t test_svacle_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacle_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svacle_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacle,_n_f32,,)(pg, op1, op2);
 }
@@ -112,7 +120,7 @@ svbool_t test_svacle_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svacle_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svacle_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svacle,_n_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
index 8c901d14a26fc..df3101f565e91 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_aclt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svaclt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svaclt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaclt,_f16,,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svbool_t test_svaclt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svaclt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svaclt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaclt,_f32,,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svbool_t test_svaclt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svaclt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svaclt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaclt,_f64,,)(pg, op1, op2);
 }
@@ -89,7 +97,7 @@ svbool_t test_svaclt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svaclt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svaclt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaclt,_n_f32,,)(pg, op1, op2);
 }
@@ -112,7 +120,7 @@ svbool_t test_svaclt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svaclt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svaclt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaclt,_n_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
index 8844297ff9d88..f671a2f23ca3d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_add.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svadd_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svadd_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svadd_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svadd_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svadd_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svadd_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svadd_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svadd_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svadd_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svadd_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svadd_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svadd_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svadd_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svadd_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svadd_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svadd_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svadd_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svadd_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svadd_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svadd_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svadd_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svadd_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svadd_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svadd_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svadd_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svadd_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svadd_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svadd_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svadd_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svadd_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svadd_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svadd_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svadd_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svadd_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svadd_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svadd_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svadd_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svadd_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svadd_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svadd_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svadd_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svadd_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svadd_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svadd_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svadd_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svadd_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svadd_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svadd_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svadd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svadd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svadd_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svadd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svadd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svadd_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svadd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svadd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svadd_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svadd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svadd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svadd_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svadd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svadd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svadd_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svadd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svadd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svadd_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svadd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svadd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svadd_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svadd_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svadd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svadd_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svadd,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
index fdac8aafe2a1d..efd0046998957 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_addv.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.saddv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svaddv_s8(svbool_t pg, svint8_t op)
+int64_t test_svaddv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int64_t test_svaddv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.saddv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svaddv_s16(svbool_t pg, svint16_t op)
+int64_t test_svaddv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int64_t test_svaddv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.saddv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svaddv_s32(svbool_t pg, svint32_t op)
+int64_t test_svaddv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int64_t test_svaddv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.saddv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svaddv_s64(svbool_t pg, svint64_t op)
+int64_t test_svaddv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_svaddv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uaddv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svaddv_u8(svbool_t pg, svuint8_t op)
+uint64_t test_svaddv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint64_t test_svaddv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uaddv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svaddv_u16(svbool_t pg, svuint16_t op)
+uint64_t test_svaddv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint64_t test_svaddv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uaddv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svaddv_u32(svbool_t pg, svuint32_t op)
+uint64_t test_svaddv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint64_t test_svaddv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uaddv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svaddv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svaddv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_u64,,)(pg, op);
 }
@@ -158,7 +166,7 @@ uint64_t test_svaddv_u64(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.faddv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svaddv_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svaddv_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_f16,,)(pg, op);
 }
@@ -175,7 +183,7 @@ float16_t test_svaddv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svaddv_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svaddv_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_f32,,)(pg, op);
 }
@@ -192,7 +200,7 @@ float32_t test_svaddv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svaddv_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svaddv_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svaddv,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
index 6e02b018834f5..ccb8d6259a656 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_and.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svand_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svand_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svand_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svand_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svand_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svand_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svand_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svand_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svand_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svand_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svand_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svand_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svand_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svand_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svand_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svand_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svand_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svand_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svand_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svand_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svand_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svand_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svand_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svand_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svand_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svand_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svand_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svand_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svand_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svand_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svand_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svand_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svand_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svand_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svand_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svand_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svand_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svand_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svand_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svand_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svand_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svand_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svand_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svand_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svand_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svand_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svand_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svand_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svand_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svand_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svand_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svand_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svand_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svand_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svand_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svand_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svand_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svand_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svand_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svand_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svand_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svand_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svand_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svand_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svand_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svand_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svand_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svand_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svand_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svand_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svand_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svand_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svand_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svand_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svand_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svand_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svand_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svand_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svand_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svand_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svand_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svand_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svand_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svand_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svand_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svand_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svand_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svand_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svand_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svand_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svand_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svand_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svand_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svand_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svand_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svand_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svand_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svand_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svand_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svand_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svand_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svand_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svand_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svand_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svand_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svand_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svand_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svand_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svand_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svand_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svand_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svand_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svand_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svand_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svand_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svand_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svand_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svand_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svand_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svand_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svand_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svand_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svand_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svand_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svand_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svand_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svand_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svand_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svand_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svand_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svand_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svand_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svand_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svand_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svand_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svand_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svand_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svand_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svand_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svand_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svand_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svand_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svand_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_n_u64,_x,)(pg, op1, op2);
 }
@@ -944,7 +952,7 @@ svuint64_t test_svand_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svand_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svand_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svand,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
index ed5c9da5a47cc..73e7267c24ee8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_andv.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svandv_s8(svbool_t pg, svint8_t op)
+int8_t test_svandv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int8_t test_svandv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svandv_s16(svbool_t pg, svint16_t op)
+int16_t test_svandv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int16_t test_svandv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svandv_s32(svbool_t pg, svint32_t op)
+int32_t test_svandv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int32_t test_svandv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svandv_s64(svbool_t pg, svint64_t op)
+int64_t test_svandv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_svandv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.andv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svandv_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svandv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint8_t test_svandv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.andv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svandv_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svandv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint16_t test_svandv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.andv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svandv_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svandv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint32_t test_svandv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.andv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svandv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svandv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svandv,_u64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
index 073d3bb52c719..d68b5274c3f3a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svasr_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svasr_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svasr_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s64,_z,)(pg, op1, op2);
 }
@@ -98,7 +106,7 @@ svint64_t test_svasr_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svasr_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s8,_m,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svint8_t test_svasr_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svasr_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s16,_m,)(pg, op1, op2);
 }
@@ -132,7 +140,7 @@ svint16_t test_svasr_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svasr_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s32,_m,)(pg, op1, op2);
 }
@@ -149,7 +157,7 @@ svint32_t test_svasr_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasr_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svasr_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s64,_m,)(pg, op1, op2);
 }
@@ -164,7 +172,7 @@ svint64_t test_svasr_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svasr_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s8,_x,)(pg, op1, op2);
 }
@@ -181,7 +189,7 @@ svint8_t test_svasr_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svasr_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s16,_x,)(pg, op1, op2);
 }
@@ -198,7 +206,7 @@ svint16_t test_svasr_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svasr_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s32,_x,)(pg, op1, op2);
 }
@@ -215,7 +223,7 @@ svint32_t test_svasr_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasr_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svasr_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_s64,_x,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svasr_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svasr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2)
+svint64_t test_svasr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s64,_z,)(pg, op1, op2);
 }
@@ -259,7 +267,7 @@ svint64_t test_svasr_n_s64_z(svbool_t pg, svint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2)
+svint64_t test_svasr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s64,_m,)(pg, op1, op2);
 }
@@ -280,7 +288,7 @@ svint64_t test_svasr_n_s64_m(svbool_t pg, svint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2)
+svint64_t test_svasr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s64,_x,)(pg, op1, op2);
 }
@@ -297,7 +305,7 @@ svint64_t test_svasr_n_s64_x(svbool_t pg, svint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasr_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svasr_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s8,_z,)(pg, op1, op2);
 }
@@ -316,7 +324,7 @@ svint8_t test_svasr_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasr_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svasr_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s16,_z,)(pg, op1, op2);
 }
@@ -335,7 +343,7 @@ svint16_t test_svasr_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasr_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svasr_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s32,_z,)(pg, op1, op2);
 }
@@ -350,7 +358,7 @@ svint32_t test_svasr_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svasr_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s8,_m,)(pg, op1, op2);
 }
@@ -367,7 +375,7 @@ svint8_t test_svasr_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svasr_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s16,_m,)(pg, op1, op2);
 }
@@ -384,7 +392,7 @@ svint16_t test_svasr_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svasr_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s32,_m,)(pg, op1, op2);
 }
@@ -399,7 +407,7 @@ svint32_t test_svasr_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svasr_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s8,_x,)(pg, op1, op2);
 }
@@ -416,7 +424,7 @@ svint8_t test_svasr_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svasr_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s16,_x,)(pg, op1, op2);
 }
@@ -433,7 +441,7 @@ svint16_t test_svasr_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svasr_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_s32,_x,)(pg, op1, op2);
 }
@@ -454,7 +462,7 @@ svint32_t test_svasr_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasr_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2)
+svint8_t test_svasr_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s8,_z,)(pg, op1, op2);
 }
@@ -477,7 +485,7 @@ svint8_t test_svasr_n_s8_z(svbool_t pg, svint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasr_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2)
+svint16_t test_svasr_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s16,_z,)(pg, op1, op2);
 }
@@ -500,7 +508,7 @@ svint16_t test_svasr_n_s16_z(svbool_t pg, svint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasr_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2)
+svint32_t test_svasr_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s32,_z,)(pg, op1, op2);
 }
@@ -519,7 +527,7 @@ svint32_t test_svasr_n_s32_z(svbool_t pg, svint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2)
+svint8_t test_svasr_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s8,_m,)(pg, op1, op2);
 }
@@ -540,7 +548,7 @@ svint8_t test_svasr_n_s8_m(svbool_t pg, svint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2)
+svint16_t test_svasr_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s16,_m,)(pg, op1, op2);
 }
@@ -561,7 +569,7 @@ svint16_t test_svasr_n_s16_m(svbool_t pg, svint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2)
+svint32_t test_svasr_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s32,_m,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svint32_t test_svasr_n_s32_m(svbool_t pg, svint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2)
+svint8_t test_svasr_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s8,_x,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svint8_t test_svasr_n_s8_x(svbool_t pg, svint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2)
+svint16_t test_svasr_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s16,_x,)(pg, op1, op2);
 }
@@ -622,7 +630,7 @@ svint16_t test_svasr_n_s16_x(svbool_t pg, svint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2)
+svint32_t test_svasr_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr,_n_s32,_x,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint32_t test_svasr_n_s32_x(svbool_t pg, svint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svasr_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s8,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint8_t test_svasr_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svasr_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s16,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint16_t test_svasr_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svasr_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s32,_m,)(pg, op1, op2);
 }
@@ -704,7 +712,7 @@ svint32_t test_svasr_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasr_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svasr_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s8,_z,)(pg, op1, op2);
 }
@@ -727,7 +735,7 @@ svint8_t test_svasr_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasr_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svasr_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s16,_z,)(pg, op1, op2);
 }
@@ -750,7 +758,7 @@ svint16_t test_svasr_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasr_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svasr_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s32,_z,)(pg, op1, op2);
 }
@@ -769,7 +777,7 @@ svint32_t test_svasr_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasr_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svasr_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s8,_x,)(pg, op1, op2);
 }
@@ -790,7 +798,7 @@ svint8_t test_svasr_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasr_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svasr_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s16,_x,)(pg, op1, op2);
 }
@@ -811,7 +819,7 @@ svint16_t test_svasr_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasr_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svasr_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasr_wide,_n_s32,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
index 87091eebe851c..4532f77cb4677 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_asrd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasrd_n_s8_z(svbool_t pg, svint8_t op1)
+svint8_t test_svasrd_n_s8_z(svbool_t pg, svint8_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s8,_z,)(pg, op1, 1);
 }
@@ -43,7 +51,7 @@ svint8_t test_svasrd_n_s8_z(svbool_t pg, svint8_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], i32 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svasrd_n_s8_z_1(svbool_t pg, svint8_t op1)
+svint8_t test_svasrd_n_s8_z_1(svbool_t pg, svint8_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s8,_z,)(pg, op1, 8);
 }
@@ -62,7 +70,7 @@ svint8_t test_svasrd_n_s8_z_1(svbool_t pg, svint8_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasrd_n_s16_z(svbool_t pg, svint16_t op1)
+svint16_t test_svasrd_n_s16_z(svbool_t pg, svint16_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s16,_z,)(pg, op1, 1);
 }
@@ -81,7 +89,7 @@ svint16_t test_svasrd_n_s16_z(svbool_t pg, svint16_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svasrd_n_s16_z_1(svbool_t pg, svint16_t op1)
+svint16_t test_svasrd_n_s16_z_1(svbool_t pg, svint16_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s16,_z,)(pg, op1, 16);
 }
@@ -100,7 +108,7 @@ svint16_t test_svasrd_n_s16_z_1(svbool_t pg, svint16_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasrd_n_s32_z(svbool_t pg, svint32_t op1)
+svint32_t test_svasrd_n_s32_z(svbool_t pg, svint32_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s32,_z,)(pg, op1, 1);
 }
@@ -119,7 +127,7 @@ svint32_t test_svasrd_n_s32_z(svbool_t pg, svint32_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], i32 32)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svasrd_n_s32_z_1(svbool_t pg, svint32_t op1)
+svint32_t test_svasrd_n_s32_z_1(svbool_t pg, svint32_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s32,_z,)(pg, op1, 32);
 }
@@ -138,7 +146,7 @@ svint32_t test_svasrd_n_s32_z_1(svbool_t pg, svint32_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svasrd_n_s64_z(svbool_t pg, svint64_t op1)
+svint64_t test_svasrd_n_s64_z(svbool_t pg, svint64_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s64,_z,)(pg, op1, 1);
 }
@@ -157,7 +165,7 @@ svint64_t test_svasrd_n_s64_z(svbool_t pg, svint64_t op1)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], i32 64)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svasrd_n_s64_z_1(svbool_t pg, svint64_t op1)
+svint64_t test_svasrd_n_s64_z_1(svbool_t pg, svint64_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s64,_z,)(pg, op1, 64);
 }
@@ -172,7 +180,7 @@ svint64_t test_svasrd_n_s64_z_1(svbool_t pg, svint64_t op1)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasrd_n_s8_m(svbool_t pg, svint8_t op1)
+svint8_t test_svasrd_n_s8_m(svbool_t pg, svint8_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s8,_m,)(pg, op1, 1);
 }
@@ -189,7 +197,7 @@ svint8_t test_svasrd_n_s8_m(svbool_t pg, svint8_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasrd_n_s16_m(svbool_t pg, svint16_t op1)
+svint16_t test_svasrd_n_s16_m(svbool_t pg, svint16_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s16,_m,)(pg, op1, 1);
 }
@@ -206,7 +214,7 @@ svint16_t test_svasrd_n_s16_m(svbool_t pg, svint16_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasrd_n_s32_m(svbool_t pg, svint32_t op1)
+svint32_t test_svasrd_n_s32_m(svbool_t pg, svint32_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s32,_m,)(pg, op1, 1);
 }
@@ -223,7 +231,7 @@ svint32_t test_svasrd_n_s32_m(svbool_t pg, svint32_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasrd_n_s64_m(svbool_t pg, svint64_t op1)
+svint64_t test_svasrd_n_s64_m(svbool_t pg, svint64_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s64,_m,)(pg, op1, 1);
 }
@@ -238,7 +246,7 @@ svint64_t test_svasrd_n_s64_m(svbool_t pg, svint64_t op1)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], i32 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svasrd_n_s8_x(svbool_t pg, svint8_t op1)
+svint8_t test_svasrd_n_s8_x(svbool_t pg, svint8_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s8,_x,)(pg, op1, 8);
 }
@@ -255,7 +263,7 @@ svint8_t test_svasrd_n_s8_x(svbool_t pg, svint8_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svasrd_n_s16_x(svbool_t pg, svint16_t op1)
+svint16_t test_svasrd_n_s16_x(svbool_t pg, svint16_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s16,_x,)(pg, op1, 16);
 }
@@ -272,7 +280,7 @@ svint16_t test_svasrd_n_s16_x(svbool_t pg, svint16_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], i32 32)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svasrd_n_s32_x(svbool_t pg, svint32_t op1)
+svint32_t test_svasrd_n_s32_x(svbool_t pg, svint32_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s32,_x,)(pg, op1, 32);
 }
@@ -289,7 +297,7 @@ svint32_t test_svasrd_n_s32_x(svbool_t pg, svint32_t op1)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], i32 64)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svasrd_n_s64_x(svbool_t pg, svint64_t op1)
+svint64_t test_svasrd_n_s64_x(svbool_t pg, svint64_t op1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svasrd,_n_s64,_x,)(pg, op1, 64);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
index 6424c5fd6d03c..bbbd612507bcb 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfdot.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfdot_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfdot_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfdot, _f32, , )(x, y, z);
 }
 
@@ -39,7 +46,7 @@ svfloat32_t test_bfdot_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfdot_lane, _f32, , )(x, y, z, 0);
 }
 
@@ -53,7 +60,7 @@ svfloat32_t test_bfdot_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfdot_lane, _f32, , )(x, y, z, 3);
 }
 
@@ -71,6 +78,6 @@ svfloat32_t test_bfdot_lane_3_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfdot_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) {
+svfloat32_t test_bfdot_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfdot, _n_f32, , )(x, y, z);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
index 57c121742af6a..d0c9166a725bb 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalb.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svbfmlalb_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_svbfmlalb_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalb, _f32, , )(x, y, z);
 }
 
@@ -39,7 +46,7 @@ svfloat32_t test_svbfmlalb_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalb_lane, _f32, , )(x, y, z, 0);
 }
 
@@ -53,7 +60,7 @@ svfloat32_t test_bfmlalb_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalb_lane, _f32, , )(x, y, z, 7);
 }
 
@@ -71,6 +78,6 @@ svfloat32_t test_bfmlalb_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalb_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) {
+svfloat32_t test_bfmlalb_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalb, _n_f32, , )(x, y, z);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
index b1904ae4c0469..f7f9a9fef4925 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bfmlalt.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svbfmlalt_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_svbfmlalt_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalt, _f32, , )(x, y, z);
 }
 
@@ -39,7 +46,7 @@ svfloat32_t test_svbfmlalt_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalt_lane, _f32, , )(x, y, z, 0);
 }
 
@@ -53,7 +60,7 @@ svfloat32_t test_bfmlalt_lane_0_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane.v2(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[Z:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) {
+svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalt_lane, _f32, , )(x, y, z, 7);
 }
 
@@ -71,6 +78,6 @@ svfloat32_t test_bfmlalt_lane_7_f32(svfloat32_t x, svbfloat16_t y, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float> [[X:%.*]], <vscale x 8 x bfloat> [[Y:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_bfmlalt_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) {
+svfloat32_t test_bfmlalt_n_f32(svfloat32_t x, svbfloat16_t y, bfloat16_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svbfmlalt, _n_f32, , )(x, y, z);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
index bf5cf17dafa6d..597a130a16afc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_bic.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svbic_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svbic_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svbic_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svbic_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svbic_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svbic_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svbic_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svbic_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svbic_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svbic_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svbic_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svbic_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svbic_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbic_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svbic_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svbic_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbic_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svbic_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svbic_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbic_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svbic_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svbic_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbic_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svbic_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svbic_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svbic_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svbic_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svbic_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svbic_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svbic_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svbic_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svbic_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svbic_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svbic_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svbic_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svbic_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svbic_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbic_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svbic_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svbic_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbic_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svbic_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svbic_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbic_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svbic_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svbic_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbic_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svbic_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svbic_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svbic_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svbic_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svbic_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svbic_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svbic_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svbic_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svbic_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svbic_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svbic_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svbic_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svbic_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svbic_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svbic_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svbic_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svbic_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svbic_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svbic_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svbic_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svbic_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svbic_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svbic_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svbic_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svbic_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svbic_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svbic_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svbic_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svbic_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svbic_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svbic_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svbic_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svbic_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svbic_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svbic_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svbic_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svbic_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svbic_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svbic_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svbic_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svbic_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svbic_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svbic_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svbic_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svbic_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svbic_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svbic_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svbic_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svbic_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svbic_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svbic_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svbic_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svbic_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svbic_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svbic_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svbic_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svbic_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svbic_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svbic_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svbic_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svbic_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svbic_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svbic_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svbic_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svbic_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svbic_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svbic_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svbic_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svbic_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svbic_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svbic_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svbic_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svbic_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svbic_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svbic_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svbic_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svbic_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svbic_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svbic_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svbic_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svbic_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svbic_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svbic_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svbic_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svbic_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svbic_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svbic_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svbic_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svbic_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svbic_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svbic_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svbic_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svbic_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svbic_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svbic_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svbic_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_n_u64,_x,)(pg, op1, op2);
 }
@@ -944,7 +952,7 @@ svuint64_t test_svbic_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.bic.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbic_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svbic_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbic,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
index c07325f769dbf..15cc2b0d26084 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brka.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrka_b_z(svbool_t pg, svbool_t op)
+svbool_t test_svbrka_b_z(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrka,_b,_z,)(pg, op);
 }
@@ -39,7 +47,7 @@ svbool_t test_svbrka_b_z(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brka.nxv16i1(<vscale x 16 x i1> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrka_b_m(svbool_t inactive, svbool_t pg, svbool_t op)
+svbool_t test_svbrka_b_m(svbool_t inactive, svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrka,_b,_m,)(inactive, pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
index 0dbc7474bd3e6..50a9a1770edf6 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrkb_b_z(svbool_t pg, svbool_t op)
+svbool_t test_svbrkb_b_z(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrkb,_b,_z,)(pg, op);
 }
@@ -39,7 +47,7 @@ svbool_t test_svbrkb_b_z(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkb.nxv16i1(<vscale x 16 x i1> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrkb_b_m(svbool_t inactive, svbool_t pg, svbool_t op)
+svbool_t test_svbrkb_b_m(svbool_t inactive, svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrkb,_b,_m,)(inactive, pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
index 75fb8bf38eab2..f8d32d15d8f1e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkn.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkn.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrkn_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svbrkn_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrkn,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
index d48a63f274991..6dcb5c699af2c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpa.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpa.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrkpa_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svbrkpa_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrkpa,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
index bbe7963555d80..232b2aed5fcc3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_brkpb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.brkpb.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svbrkpb_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svbrkpb_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svbrkpb,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
index 35da7a47bab82..b588816a63ffc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cadd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 90);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svcadd_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], i32 270)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcadd_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svcadd_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f16,_z,)(pg, op1, op2, 270);
 }
@@ -66,7 +74,7 @@ svfloat16_t test_svcadd_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f32,_z,)(pg, op1, op2, 90);
 }
@@ -85,7 +93,7 @@ svfloat32_t test_svcadd_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f64,_z,)(pg, op1, op2, 90);
 }
@@ -102,7 +110,7 @@ svfloat64_t test_svcadd_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f16,_m,)(pg, op1, op2, 90);
 }
@@ -119,7 +127,7 @@ svfloat16_t test_svcadd_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f32,_m,)(pg, op1, op2, 90);
 }
@@ -136,7 +144,7 @@ svfloat32_t test_svcadd_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f64,_m,)(pg, op1, op2, 90);
 }
@@ -153,7 +161,7 @@ svfloat64_t test_svcadd_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcadd.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f16,_x,)(pg, op1, op2, 90);
 }
@@ -170,7 +178,7 @@ svfloat16_t test_svcadd_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcadd.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f32,_x,)(pg, op1, op2, 90);
 }
@@ -187,7 +195,7 @@ svfloat32_t test_svcadd_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcadd.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svcadd_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcadd,_f64,_x,)(pg, op1, op2, 90);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
index 66478f8bd0027..51f035906b21e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[FALLBACK:%.*]], <vscale x 8 x bfloat> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
+svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svclasta_bf16'}}
   return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data);
 }
@@ -44,7 +52,7 @@ svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat [[FALLBACK:%.*]], <vscale x 8 x bfloat> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret bfloat [[TMP1]]
 //
-bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
+bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svclasta_n_bf16'}}
   return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
index 2c53197a37a18..4712d57be729b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svclasta_s8(svbool_t pg, svint8_t fallback, svint8_t data)
+svint8_t test_svclasta_s8(svbool_t pg, svint8_t fallback, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_s8,,)(pg, fallback, data);
 }
@@ -41,7 +49,7 @@ svint8_t test_svclasta_s8(svbool_t pg, svint8_t fallback, svint8_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svclasta_s16(svbool_t pg, svint16_t fallback, svint16_t data)
+svint16_t test_svclasta_s16(svbool_t pg, svint16_t fallback, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_s16,,)(pg, fallback, data);
 }
@@ -58,7 +66,7 @@ svint16_t test_svclasta_s16(svbool_t pg, svint16_t fallback, svint16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svclasta_s32(svbool_t pg, svint32_t fallback, svint32_t data)
+svint32_t test_svclasta_s32(svbool_t pg, svint32_t fallback, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_s32,,)(pg, fallback, data);
 }
@@ -75,7 +83,7 @@ svint32_t test_svclasta_s32(svbool_t pg, svint32_t fallback, svint32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svclasta_s64(svbool_t pg, svint64_t fallback, svint64_t data)
+svint64_t test_svclasta_s64(svbool_t pg, svint64_t fallback, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_s64,,)(pg, fallback, data);
 }
@@ -90,7 +98,7 @@ svint64_t test_svclasta_s64(svbool_t pg, svint64_t fallback, svint64_t data)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clasta.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclasta_u8(svbool_t pg, svuint8_t fallback, svuint8_t data)
+svuint8_t test_svclasta_u8(svbool_t pg, svuint8_t fallback, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_u8,,)(pg, fallback, data);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svclasta_u8(svbool_t pg, svuint8_t fallback, svuint8_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclasta_u16(svbool_t pg, svuint16_t fallback, svuint16_t data)
+svuint16_t test_svclasta_u16(svbool_t pg, svuint16_t fallback, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_u16,,)(pg, fallback, data);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svclasta_u16(svbool_t pg, svuint16_t fallback, svuint16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclasta_u32(svbool_t pg, svuint32_t fallback, svuint32_t data)
+svuint32_t test_svclasta_u32(svbool_t pg, svuint32_t fallback, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_u32,,)(pg, fallback, data);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svclasta_u32(svbool_t pg, svuint32_t fallback, svuint32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclasta_u64(svbool_t pg, svuint64_t fallback, svuint64_t data)
+svuint64_t test_svclasta_u64(svbool_t pg, svuint64_t fallback, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_u64,,)(pg, fallback, data);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svclasta_u64(svbool_t pg, svuint64_t fallback, svuint64_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[FALLBACK:%.*]], <vscale x 8 x half> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svclasta_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data)
+svfloat16_t test_svclasta_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_f16,,)(pg, fallback, data);
 }
@@ -175,7 +183,7 @@ svfloat16_t test_svclasta_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t dat
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[FALLBACK:%.*]], <vscale x 4 x float> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svclasta_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data)
+svfloat32_t test_svclasta_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_f32,,)(pg, fallback, data);
 }
@@ -192,7 +200,7 @@ svfloat32_t test_svclasta_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t dat
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[FALLBACK:%.*]], <vscale x 2 x double> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svclasta_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data)
+svfloat64_t test_svclasta_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_f64,,)(pg, fallback, data);
 }
@@ -207,7 +215,7 @@ svfloat64_t test_svclasta_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t dat
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8 [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svclasta_n_s8(svbool_t pg, int8_t fallback, svint8_t data)
+int8_t test_svclasta_n_s8(svbool_t pg, int8_t fallback, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_s8,,)(pg, fallback, data);
 }
@@ -230,7 +238,7 @@ int8_t test_svclasta_n_s8(svbool_t pg, int8_t fallback, svint8_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
 // CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
-int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
+int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_s16,,)(pg, fallback, data);
 }
@@ -253,7 +261,7 @@ int16_t test_svclasta_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
 // CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
-int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
+int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_s32,,)(pg, fallback, data);
 }
@@ -276,7 +284,7 @@ int32_t test_svclasta_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
 // CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
-int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
+int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_s64,,)(pg, fallback, data);
 }
@@ -291,7 +299,7 @@ int64_t test_svclasta_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.clasta.n.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8 [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svclasta_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data)
+uint8_t test_svclasta_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_u8,,)(pg, fallback, data);
 }
@@ -314,7 +322,7 @@ uint8_t test_svclasta_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
 // CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
-uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
+uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_u16,,)(pg, fallback, data);
 }
@@ -337,7 +345,7 @@ uint16_t test_svclasta_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
 // CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
-uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
+uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_u32,,)(pg, fallback, data);
 }
@@ -360,7 +368,7 @@ uint32_t test_svclasta_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
 // CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
-uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
+uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_u64,,)(pg, fallback, data);
 }
@@ -377,7 +385,7 @@ uint64_t test_svclasta_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[FALLBACK:%.*]], <vscale x 8 x half> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svclasta_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data)
+float16_t test_svclasta_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_f16,,)(pg, fallback, data);
 }
@@ -394,7 +402,7 @@ float16_t test_svclasta_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[FALLBACK:%.*]], <vscale x 4 x float> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svclasta_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data)
+float32_t test_svclasta_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_f32,,)(pg, fallback, data);
 }
@@ -411,7 +419,7 @@ float32_t test_svclasta_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[FALLBACK:%.*]], <vscale x 2 x double> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svclasta_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data)
+float64_t test_svclasta_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclasta,_n_f64,,)(pg, fallback, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
index 5c78db73e6ccb..2ee31baf476a0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[FALLBACK:%.*]], <vscale x 8 x bfloat> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) {
+svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svclastb_bf16'}}
   return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data);
 }
@@ -44,7 +52,7 @@ svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> [[TMP0]], bfloat [[FALLBACK:%.*]], <vscale x 8 x bfloat> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret bfloat [[TMP1]]
 //
-bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) {
+bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svclastb_n_bf16'}}
   return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
index 0d123f75e2218..caa5dd9381ab2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svclastb_s8(svbool_t pg, svint8_t fallback, svint8_t data)
+svint8_t test_svclastb_s8(svbool_t pg, svint8_t fallback, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_s8,,)(pg, fallback, data);
 }
@@ -41,7 +49,7 @@ svint8_t test_svclastb_s8(svbool_t pg, svint8_t fallback, svint8_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svclastb_s16(svbool_t pg, svint16_t fallback, svint16_t data)
+svint16_t test_svclastb_s16(svbool_t pg, svint16_t fallback, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_s16,,)(pg, fallback, data);
 }
@@ -58,7 +66,7 @@ svint16_t test_svclastb_s16(svbool_t pg, svint16_t fallback, svint16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svclastb_s32(svbool_t pg, svint32_t fallback, svint32_t data)
+svint32_t test_svclastb_s32(svbool_t pg, svint32_t fallback, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_s32,,)(pg, fallback, data);
 }
@@ -75,7 +83,7 @@ svint32_t test_svclastb_s32(svbool_t pg, svint32_t fallback, svint32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svclastb_s64(svbool_t pg, svint64_t fallback, svint64_t data)
+svint64_t test_svclastb_s64(svbool_t pg, svint64_t fallback, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_s64,,)(pg, fallback, data);
 }
@@ -90,7 +98,7 @@ svint64_t test_svclastb_s64(svbool_t pg, svint64_t fallback, svint64_t data)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclastb_u8(svbool_t pg, svuint8_t fallback, svuint8_t data)
+svuint8_t test_svclastb_u8(svbool_t pg, svuint8_t fallback, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_u8,,)(pg, fallback, data);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svclastb_u8(svbool_t pg, svuint8_t fallback, svuint8_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[FALLBACK:%.*]], <vscale x 8 x i16> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclastb_u16(svbool_t pg, svuint16_t fallback, svuint16_t data)
+svuint16_t test_svclastb_u16(svbool_t pg, svuint16_t fallback, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_u16,,)(pg, fallback, data);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svclastb_u16(svbool_t pg, svuint16_t fallback, svuint16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[FALLBACK:%.*]], <vscale x 4 x i32> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclastb_u32(svbool_t pg, svuint32_t fallback, svuint32_t data)
+svuint32_t test_svclastb_u32(svbool_t pg, svuint32_t fallback, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_u32,,)(pg, fallback, data);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svclastb_u32(svbool_t pg, svuint32_t fallback, svuint32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[FALLBACK:%.*]], <vscale x 2 x i64> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclastb_u64(svbool_t pg, svuint64_t fallback, svuint64_t data)
+svuint64_t test_svclastb_u64(svbool_t pg, svuint64_t fallback, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_u64,,)(pg, fallback, data);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svclastb_u64(svbool_t pg, svuint64_t fallback, svuint64_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[FALLBACK:%.*]], <vscale x 8 x half> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svclastb_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data)
+svfloat16_t test_svclastb_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_f16,,)(pg, fallback, data);
 }
@@ -175,7 +183,7 @@ svfloat16_t test_svclastb_f16(svbool_t pg, svfloat16_t fallback, svfloat16_t dat
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[FALLBACK:%.*]], <vscale x 4 x float> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svclastb_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data)
+svfloat32_t test_svclastb_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_f32,,)(pg, fallback, data);
 }
@@ -192,7 +200,7 @@ svfloat32_t test_svclastb_f32(svbool_t pg, svfloat32_t fallback, svfloat32_t dat
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[FALLBACK:%.*]], <vscale x 2 x double> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svclastb_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data)
+svfloat64_t test_svclastb_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_f64,,)(pg, fallback, data);
 }
@@ -207,7 +215,7 @@ svfloat64_t test_svclastb_f64(svbool_t pg, svfloat64_t fallback, svfloat64_t dat
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8 [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svclastb_n_s8(svbool_t pg, int8_t fallback, svint8_t data)
+int8_t test_svclastb_n_s8(svbool_t pg, int8_t fallback, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_s8,,)(pg, fallback, data);
 }
@@ -230,7 +238,7 @@ int8_t test_svclastb_n_s8(svbool_t pg, int8_t fallback, svint8_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
 // CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
-int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
+int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_s16,,)(pg, fallback, data);
 }
@@ -253,7 +261,7 @@ int16_t test_svclastb_n_s16(svbool_t pg, int16_t fallback, svint16_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
 // CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
-int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
+int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_s32,,)(pg, fallback, data);
 }
@@ -276,7 +284,7 @@ int32_t test_svclastb_n_s32(svbool_t pg, int32_t fallback, svint32_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
 // CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
-int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
+int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_s64,,)(pg, fallback, data);
 }
@@ -291,7 +299,7 @@ int64_t test_svclastb_n_s64(svbool_t pg, int64_t fallback, svint64_t data)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.clastb.n.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], i8 [[FALLBACK:%.*]], <vscale x 16 x i8> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svclastb_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data)
+uint8_t test_svclastb_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_u8,,)(pg, fallback, data);
 }
@@ -314,7 +322,7 @@ uint8_t test_svclastb_n_u8(svbool_t pg, uint8_t fallback, svuint8_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast half [[TMP3]] to i16
 // CPP-CHECK-NEXT:    ret i16 [[TMP4]]
 //
-uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
+uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_u16,,)(pg, fallback, data);
 }
@@ -337,7 +345,7 @@ uint16_t test_svclastb_n_u16(svbool_t pg, uint16_t fallback, svuint16_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast float [[TMP3]] to i32
 // CPP-CHECK-NEXT:    ret i32 [[TMP4]]
 //
-uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
+uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_u32,,)(pg, fallback, data);
 }
@@ -360,7 +368,7 @@ uint32_t test_svclastb_n_u32(svbool_t pg, uint32_t fallback, svuint32_t data)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = bitcast double [[TMP3]] to i64
 // CPP-CHECK-NEXT:    ret i64 [[TMP4]]
 //
-uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
+uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_u64,,)(pg, fallback, data);
 }
@@ -377,7 +385,7 @@ uint64_t test_svclastb_n_u64(svbool_t pg, uint64_t fallback, svuint64_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1> [[TMP0]], half [[FALLBACK:%.*]], <vscale x 8 x half> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svclastb_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data)
+float16_t test_svclastb_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_f16,,)(pg, fallback, data);
 }
@@ -394,7 +402,7 @@ float16_t test_svclastb_n_f16(svbool_t pg, float16_t fallback, svfloat16_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1> [[TMP0]], float [[FALLBACK:%.*]], <vscale x 4 x float> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svclastb_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data)
+float32_t test_svclastb_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_f32,,)(pg, fallback, data);
 }
@@ -411,7 +419,7 @@ float32_t test_svclastb_n_f32(svbool_t pg, float32_t fallback, svfloat32_t data)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1> [[TMP0]], double [[FALLBACK:%.*]], <vscale x 2 x double> [[DATA:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svclastb_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data)
+float64_t test_svclastb_n_f64(svbool_t pg, float64_t fallback, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclastb,_n_f64,,)(pg, fallback, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c
index 5936b976e0ac5..7546593cc1f24 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cls.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcls_s8_z(svbool_t pg, svint8_t op)
+svuint8_t test_svcls_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svuint8_t test_svcls_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcls_s16_z(svbool_t pg, svint16_t op)
+svuint16_t test_svcls_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svuint16_t test_svcls_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcls_s32_z(svbool_t pg, svint32_t op)
+svuint32_t test_svcls_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svuint32_t test_svcls_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcls_s64_z(svbool_t pg, svint64_t op)
+svuint64_t test_svcls_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svuint64_t test_svcls_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
+svuint8_t test_svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s8,_m,)(inactive, pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svcls_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
+svuint16_t test_svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s16,_m,)(inactive, pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svcls_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
+svuint32_t test_svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s32,_m,)(inactive, pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svcls_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcls_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
+svuint64_t test_svcls_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s64,_m,)(inactive, pg, op);
 }
@@ -156,7 +164,7 @@ svuint64_t test_svcls_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cls.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcls_s8_x(svbool_t pg, svint8_t op)
+svuint8_t test_svcls_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s8,_x,)(pg, op);
 }
@@ -173,7 +181,7 @@ svuint8_t test_svcls_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cls.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcls_s16_x(svbool_t pg, svint16_t op)
+svuint16_t test_svcls_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s16,_x,)(pg, op);
 }
@@ -190,7 +198,7 @@ svuint16_t test_svcls_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcls_s32_x(svbool_t pg, svint32_t op)
+svuint32_t test_svcls_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s32,_x,)(pg, op);
 }
@@ -207,7 +215,7 @@ svuint32_t test_svcls_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cls.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcls_s64_x(svbool_t pg, svint64_t op)
+svuint64_t test_svcls_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcls,_s64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c
index 239e6ad5584bf..833d9ddc61bae 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clz.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_s8_z(svbool_t pg, svint8_t op)
+svuint8_t test_svclz_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svuint8_t test_svclz_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_s16_z(svbool_t pg, svint16_t op)
+svuint16_t test_svclz_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svuint16_t test_svclz_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_s32_z(svbool_t pg, svint32_t op)
+svuint32_t test_svclz_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svuint32_t test_svclz_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_s64_z(svbool_t pg, svint64_t op)
+svuint64_t test_svclz_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svuint64_t test_svclz_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_u8_z(svbool_t pg, svuint8_t op)
+svuint8_t test_svclz_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u8,_z,)(pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svclz_u8_z(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svclz_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u16,_z,)(pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svclz_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svclz_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u32,_z,)(pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svclz_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svclz_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u64,_z,)(pg, op);
 }
@@ -156,7 +164,7 @@ svuint64_t test_svclz_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
+svuint8_t test_svclz_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s8,_m,)(inactive, pg, op);
 }
@@ -173,7 +181,7 @@ svuint8_t test_svclz_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
+svuint16_t test_svclz_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s16,_m,)(inactive, pg, op);
 }
@@ -190,7 +198,7 @@ svuint16_t test_svclz_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
+svuint32_t test_svclz_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s32,_m,)(inactive, pg, op);
 }
@@ -207,7 +215,7 @@ svuint32_t test_svclz_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
+svuint64_t test_svclz_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s64,_m,)(inactive, pg, op);
 }
@@ -222,7 +230,7 @@ svuint64_t test_svclz_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
+svuint8_t test_svclz_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u8,_m,)(inactive, pg, op);
 }
@@ -239,7 +247,7 @@ svuint8_t test_svclz_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svclz_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u16,_m,)(inactive, pg, op);
 }
@@ -256,7 +264,7 @@ svuint16_t test_svclz_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svclz_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u32,_m,)(inactive, pg, op);
 }
@@ -273,7 +281,7 @@ svuint32_t test_svclz_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svclz_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u64,_m,)(inactive, pg, op);
 }
@@ -288,7 +296,7 @@ svuint64_t test_svclz_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_s8_x(svbool_t pg, svint8_t op)
+svuint8_t test_svclz_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s8,_x,)(pg, op);
 }
@@ -305,7 +313,7 @@ svuint8_t test_svclz_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_s16_x(svbool_t pg, svint16_t op)
+svuint16_t test_svclz_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s16,_x,)(pg, op);
 }
@@ -322,7 +330,7 @@ svuint16_t test_svclz_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_s32_x(svbool_t pg, svint32_t op)
+svuint32_t test_svclz_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s32,_x,)(pg, op);
 }
@@ -339,7 +347,7 @@ svuint32_t test_svclz_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_s64_x(svbool_t pg, svint64_t op)
+svuint64_t test_svclz_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_s64,_x,)(pg, op);
 }
@@ -354,7 +362,7 @@ svuint64_t test_svclz_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.clz.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svclz_u8_x(svbool_t pg, svuint8_t op)
+svuint8_t test_svclz_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u8,_x,)(pg, op);
 }
@@ -371,7 +379,7 @@ svuint8_t test_svclz_u8_x(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.clz.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svclz_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svclz_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u16,_x,)(pg, op);
 }
@@ -388,7 +396,7 @@ svuint16_t test_svclz_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svclz_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svclz_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u32,_x,)(pg, op);
 }
@@ -405,7 +413,7 @@ svuint32_t test_svclz_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.clz.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svclz_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svclz_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svclz,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
index 4dc2c79cdb89b..fc61b6f8ff9f8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmla.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 0);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svcmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 90);
 }
@@ -66,7 +74,7 @@ svfloat16_t test_svcmla_f16_z_1(svbool_t pg, svfloat16_t op1, svfloat16_t op2, s
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 180)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 180);
 }
@@ -85,7 +93,7 @@ svfloat16_t test_svcmla_f16_z_2(svbool_t pg, svfloat16_t op1, svfloat16_t op2, s
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 270)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_z,)(pg, op1, op2, op3, 270);
 }
@@ -104,7 +112,7 @@ svfloat16_t test_svcmla_f16_z_3(svbool_t pg, svfloat16_t op1, svfloat16_t op2, s
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f32,_z,)(pg, op1, op2, op3, 0);
 }
@@ -123,7 +131,7 @@ svfloat32_t test_svcmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f64,_z,)(pg, op1, op2, op3, 90);
 }
@@ -140,7 +148,7 @@ svfloat64_t test_svcmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 180)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_m,)(pg, op1, op2, op3, 180);
 }
@@ -157,7 +165,7 @@ svfloat16_t test_svcmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 270)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f32,_m,)(pg, op1, op2, op3, 270);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svcmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f64,_m,)(pg, op1, op2, op3, 0);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svcmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f16,_x,)(pg, op1, op2, op3, 90);
 }
@@ -208,7 +216,7 @@ svfloat16_t test_svcmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 180)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f32,_x,)(pg, op1, op2, op3, 180);
 }
@@ -225,7 +233,7 @@ svfloat32_t test_svcmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 270)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla,_f64,_x,)(pg, op1, op2, op3, 270);
 }
@@ -240,7 +248,7 @@ svfloat64_t test_svcmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 0, i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 0, 0);
 }
@@ -255,7 +263,7 @@ svfloat16_t test_svcmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t o
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcmla.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 3, i32 90)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla_lane,_f16,,)(op1, op2, op3, 3, 90);
 }
@@ -270,7 +278,7 @@ svfloat16_t test_svcmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 0, i32 180)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 0, 180);
 }
@@ -285,7 +293,7 @@ svfloat32_t test_svcmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t o
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcmla.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 1, i32 270)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svcmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmla_lane,_f32,,)(op1, op2, op3, 1, 270);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c
index fa80e58a9e375..ee96d2b8fbc65 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpeq.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmpeq_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmpeq_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmpeq_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmpeq_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmpeq_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmpeq_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmpeq_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmpeq_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmpeq_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmpeq_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmpeq_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmpeq_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmpeq_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmpeq_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmpeq_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmpeq_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmpeq_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmpeq_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmpeq_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmpeq_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmpeq_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmpeq_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmpeq_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmpeq_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmpeq_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_s32,,)(pg, op1, op2);
 }
@@ -271,7 +279,7 @@ svbool_t test_svcmpeq_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmpeq_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_s8,,)(pg, op1, op2);
 }
@@ -294,7 +302,7 @@ svbool_t test_svcmpeq_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmpeq_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_s16,,)(pg, op1, op2);
 }
@@ -317,7 +325,7 @@ svbool_t test_svcmpeq_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmpeq_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_s32,,)(pg, op1, op2);
 }
@@ -336,7 +344,7 @@ svbool_t test_svcmpeq_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmpeq_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_u8,,)(pg, op1, op2);
 }
@@ -359,7 +367,7 @@ svbool_t test_svcmpeq_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmpeq_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_u16,,)(pg, op1, op2);
 }
@@ -382,7 +390,7 @@ svbool_t test_svcmpeq_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmpeq_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_u32,,)(pg, op1, op2);
 }
@@ -401,7 +409,7 @@ svbool_t test_svcmpeq_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmpeq_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_f16,,)(pg, op1, op2);
 }
@@ -420,7 +428,7 @@ svbool_t test_svcmpeq_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmpeq_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_f32,,)(pg, op1, op2);
 }
@@ -439,7 +447,7 @@ svbool_t test_svcmpeq_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmpeq_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_f64,,)(pg, op1, op2);
 }
@@ -462,7 +470,7 @@ svbool_t test_svcmpeq_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmpeq_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_f16,,)(pg, op1, op2);
 }
@@ -485,7 +493,7 @@ svbool_t test_svcmpeq_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmpeq_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_f32,,)(pg, op1, op2);
 }
@@ -508,7 +516,7 @@ svbool_t test_svcmpeq_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmpeq_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq,_n_f64,,)(pg, op1, op2);
 }
@@ -527,7 +535,7 @@ svbool_t test_svcmpeq_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpeq.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpeq_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmpeq_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -550,7 +558,7 @@ svbool_t test_svcmpeq_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmpeq_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -573,7 +581,7 @@ svbool_t test_svcmpeq_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpeq_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmpeq_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpeq_wide,_n_s32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c
index 478fb34f8cebf..92d28e064a6d1 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpge.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmpge_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmpge_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmpge_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmpge_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmpge_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmpge_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmpge_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmpge_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmpge_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmpge_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmpge_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmpge_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmpge_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmpge_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmpge_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmpge_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmpge_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmpge_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmpge_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmpge_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmpge_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmpge_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmpge_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmpge_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmpge_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_s32,,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svbool_t test_svcmpge_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svbool_t test_svcmpge_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_u8,,)(pg, op1, op2);
 }
@@ -286,7 +294,7 @@ svbool_t test_svcmpge_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svbool_t test_svcmpge_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_u16,,)(pg, op1, op2);
 }
@@ -305,7 +313,7 @@ svbool_t test_svcmpge_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svbool_t test_svcmpge_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_u32,,)(pg, op1, op2);
 }
@@ -324,7 +332,7 @@ svbool_t test_svcmpge_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmpge_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_s8,,)(pg, op1, op2);
 }
@@ -347,7 +355,7 @@ svbool_t test_svcmpge_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmpge_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_s16,,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svbool_t test_svcmpge_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmpge_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_s32,,)(pg, op1, op2);
 }
@@ -389,7 +397,7 @@ svbool_t test_svcmpge_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmpge_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_u8,,)(pg, op1, op2);
 }
@@ -412,7 +420,7 @@ svbool_t test_svcmpge_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmpge_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_u16,,)(pg, op1, op2);
 }
@@ -435,7 +443,7 @@ svbool_t test_svcmpge_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmpge_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_u32,,)(pg, op1, op2);
 }
@@ -454,7 +462,7 @@ svbool_t test_svcmpge_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmpge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_f16,,)(pg, op1, op2);
 }
@@ -473,7 +481,7 @@ svbool_t test_svcmpge_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmpge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_f32,,)(pg, op1, op2);
 }
@@ -492,7 +500,7 @@ svbool_t test_svcmpge_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmpge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_f64,,)(pg, op1, op2);
 }
@@ -515,7 +523,7 @@ svbool_t test_svcmpge_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmpge_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_f16,,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svbool_t test_svcmpge_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmpge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_f32,,)(pg, op1, op2);
 }
@@ -561,7 +569,7 @@ svbool_t test_svcmpge_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmpge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge,_n_f64,,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svbool_t test_svcmpge_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmpge_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -603,7 +611,7 @@ svbool_t test_svcmpge_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmpge_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -626,7 +634,7 @@ svbool_t test_svcmpge_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmpge_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_s32,,)(pg, op1, op2);
 }
@@ -645,7 +653,7 @@ svbool_t test_svcmpge_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpge_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
+svbool_t test_svcmpge_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_u8,,)(pg, op1, op2);
 }
@@ -668,7 +676,7 @@ svbool_t test_svcmpge_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
+svbool_t test_svcmpge_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_u16,,)(pg, op1, op2);
 }
@@ -691,7 +699,7 @@ svbool_t test_svcmpge_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpge_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2)
+svbool_t test_svcmpge_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpge_wide,_n_u32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c
index 8721acdfd3fda..8c0c61c32e85c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpgt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmpgt_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmpgt_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmpgt_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmpgt_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmpgt_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmpgt_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmpgt_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmpgt_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmpgt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmpgt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmpgt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmpgt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmpgt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmpgt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmpgt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmpgt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmpgt_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmpgt_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmpgt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmpgt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmpgt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmpgt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmpgt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmpgt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmpgt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_s32,,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svbool_t test_svcmpgt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svbool_t test_svcmpgt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_u8,,)(pg, op1, op2);
 }
@@ -286,7 +294,7 @@ svbool_t test_svcmpgt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svbool_t test_svcmpgt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_u16,,)(pg, op1, op2);
 }
@@ -305,7 +313,7 @@ svbool_t test_svcmpgt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svbool_t test_svcmpgt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_u32,,)(pg, op1, op2);
 }
@@ -324,7 +332,7 @@ svbool_t test_svcmpgt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmpgt_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_s8,,)(pg, op1, op2);
 }
@@ -347,7 +355,7 @@ svbool_t test_svcmpgt_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmpgt_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_s16,,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svbool_t test_svcmpgt_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmpgt_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_s32,,)(pg, op1, op2);
 }
@@ -389,7 +397,7 @@ svbool_t test_svcmpgt_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmpgt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_u8,,)(pg, op1, op2);
 }
@@ -412,7 +420,7 @@ svbool_t test_svcmpgt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmpgt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_u16,,)(pg, op1, op2);
 }
@@ -435,7 +443,7 @@ svbool_t test_svcmpgt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmpgt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_u32,,)(pg, op1, op2);
 }
@@ -454,7 +462,7 @@ svbool_t test_svcmpgt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmpgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_f16,,)(pg, op1, op2);
 }
@@ -473,7 +481,7 @@ svbool_t test_svcmpgt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmpgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_f32,,)(pg, op1, op2);
 }
@@ -492,7 +500,7 @@ svbool_t test_svcmpgt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmpgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_f64,,)(pg, op1, op2);
 }
@@ -515,7 +523,7 @@ svbool_t test_svcmpgt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmpgt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_f16,,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svbool_t test_svcmpgt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmpgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_f32,,)(pg, op1, op2);
 }
@@ -561,7 +569,7 @@ svbool_t test_svcmpgt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmpgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt,_n_f64,,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svbool_t test_svcmpgt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmpgt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -603,7 +611,7 @@ svbool_t test_svcmpgt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmpgt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -626,7 +634,7 @@ svbool_t test_svcmpgt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmpgt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_s32,,)(pg, op1, op2);
 }
@@ -645,7 +653,7 @@ svbool_t test_svcmpgt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpgt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
+svbool_t test_svcmpgt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_u8,,)(pg, op1, op2);
 }
@@ -668,7 +676,7 @@ svbool_t test_svcmpgt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
+svbool_t test_svcmpgt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_u16,,)(pg, op1, op2);
 }
@@ -691,7 +699,7 @@ svbool_t test_svcmpgt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpgt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2)
+svbool_t test_svcmpgt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpgt_wide,_n_u32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c
index 688ea57cc7323..b982ed16d213a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmple.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmple_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmple_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmple_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmple_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmple_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmple_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmple_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmple_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmple_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmple_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmple_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmple_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmple_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmple_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmple_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmple_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmple_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmple_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmple_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmple_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmple_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmple_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmple_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmple_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmple_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_s32,,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svbool_t test_svcmple_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svbool_t test_svcmple_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_u8,,)(pg, op1, op2);
 }
@@ -286,7 +294,7 @@ svbool_t test_svcmple_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svbool_t test_svcmple_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_u16,,)(pg, op1, op2);
 }
@@ -305,7 +313,7 @@ svbool_t test_svcmple_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svbool_t test_svcmple_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_u32,,)(pg, op1, op2);
 }
@@ -324,7 +332,7 @@ svbool_t test_svcmple_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpge.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmple_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_s8,,)(pg, op1, op2);
 }
@@ -347,7 +355,7 @@ svbool_t test_svcmple_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmple_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_s16,,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svbool_t test_svcmple_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmple_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_s32,,)(pg, op1, op2);
 }
@@ -389,7 +397,7 @@ svbool_t test_svcmple_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphs.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmple_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_u8,,)(pg, op1, op2);
 }
@@ -412,7 +420,7 @@ svbool_t test_svcmple_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmple_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_u16,,)(pg, op1, op2);
 }
@@ -435,7 +443,7 @@ svbool_t test_svcmple_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmple_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_u32,,)(pg, op1, op2);
 }
@@ -454,7 +462,7 @@ svbool_t test_svcmple_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmple_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_f16,,)(pg, op1, op2);
 }
@@ -473,7 +481,7 @@ svbool_t test_svcmple_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmple_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_f32,,)(pg, op1, op2);
 }
@@ -492,7 +500,7 @@ svbool_t test_svcmple_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmple_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_f64,,)(pg, op1, op2);
 }
@@ -515,7 +523,7 @@ svbool_t test_svcmple_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmple_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_f16,,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svbool_t test_svcmple_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmple_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_f32,,)(pg, op1, op2);
 }
@@ -561,7 +569,7 @@ svbool_t test_svcmple_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmple_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple,_n_f64,,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svbool_t test_svcmple_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmple.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmple_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -603,7 +611,7 @@ svbool_t test_svcmple_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmple_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -626,7 +634,7 @@ svbool_t test_svcmple_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmple_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_s32,,)(pg, op1, op2);
 }
@@ -645,7 +653,7 @@ svbool_t test_svcmple_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpls.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmple_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
+svbool_t test_svcmple_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_u8,,)(pg, op1, op2);
 }
@@ -668,7 +676,7 @@ svbool_t test_svcmple_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
+svbool_t test_svcmple_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_u16,,)(pg, op1, op2);
 }
@@ -691,7 +699,7 @@ svbool_t test_svcmple_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmple_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2)
+svbool_t test_svcmple_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmple_wide,_n_u32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c
index 5919ba72a3901..38612a10a8091 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmplt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmplt_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmplt_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmplt_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmplt_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmplt_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmplt_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmplt_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmplt_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmplt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmplt_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmplt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmplt_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmplt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmplt_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmplt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmplt_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmplt_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmplt_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmplt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmplt_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmplt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmplt_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmplt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmplt_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmplt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_s32,,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svbool_t test_svcmplt_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svbool_t test_svcmplt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_u8,,)(pg, op1, op2);
 }
@@ -286,7 +294,7 @@ svbool_t test_svcmplt_wide_u8(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svbool_t test_svcmplt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_u16,,)(pg, op1, op2);
 }
@@ -305,7 +313,7 @@ svbool_t test_svcmplt_wide_u16(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svbool_t test_svcmplt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_u32,,)(pg, op1, op2);
 }
@@ -324,7 +332,7 @@ svbool_t test_svcmplt_wide_u32(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpgt.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmplt_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_s8,,)(pg, op1, op2);
 }
@@ -347,7 +355,7 @@ svbool_t test_svcmplt_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmplt_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_s16,,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svbool_t test_svcmplt_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmplt_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_s32,,)(pg, op1, op2);
 }
@@ -389,7 +397,7 @@ svbool_t test_svcmplt_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmphi.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmplt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_u8,,)(pg, op1, op2);
 }
@@ -412,7 +420,7 @@ svbool_t test_svcmplt_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmplt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_u16,,)(pg, op1, op2);
 }
@@ -435,7 +443,7 @@ svbool_t test_svcmplt_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmplt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_u32,,)(pg, op1, op2);
 }
@@ -454,7 +462,7 @@ svbool_t test_svcmplt_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmplt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_f16,,)(pg, op1, op2);
 }
@@ -473,7 +481,7 @@ svbool_t test_svcmplt_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmplt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_f32,,)(pg, op1, op2);
 }
@@ -492,7 +500,7 @@ svbool_t test_svcmplt_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmplt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_f64,,)(pg, op1, op2);
 }
@@ -515,7 +523,7 @@ svbool_t test_svcmplt_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmplt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_f16,,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svbool_t test_svcmplt_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmplt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_f32,,)(pg, op1, op2);
 }
@@ -561,7 +569,7 @@ svbool_t test_svcmplt_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmplt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt,_n_f64,,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svbool_t test_svcmplt_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplt.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmplt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -603,7 +611,7 @@ svbool_t test_svcmplt_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmplt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -626,7 +634,7 @@ svbool_t test_svcmplt_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmplt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_s32,,)(pg, op1, op2);
 }
@@ -645,7 +653,7 @@ svbool_t test_svcmplt_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmplo.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmplt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
+svbool_t test_svcmplt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_u8,,)(pg, op1, op2);
 }
@@ -668,7 +676,7 @@ svbool_t test_svcmplt_wide_n_u8(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
+svbool_t test_svcmplt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_u16,,)(pg, op1, op2);
 }
@@ -691,7 +699,7 @@ svbool_t test_svcmplt_wide_n_u16(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmplt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2)
+svbool_t test_svcmplt_wide_n_u32(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmplt_wide,_n_u32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c
index 5cedc1c71e3b3..d41541ca8cc0e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpne.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svbool_t test_svcmpne_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_s8,,)(pg, op1, op2);
 }
@@ -43,7 +51,7 @@ svbool_t test_svcmpne_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svbool_t test_svcmpne_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_s16,,)(pg, op1, op2);
 }
@@ -62,7 +70,7 @@ svbool_t test_svcmpne_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svbool_t test_svcmpne_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_s32,,)(pg, op1, op2);
 }
@@ -81,7 +89,7 @@ svbool_t test_svcmpne_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svbool_t test_svcmpne_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_s64,,)(pg, op1, op2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svcmpne_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svbool_t test_svcmpne_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_u8,,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svbool_t test_svcmpne_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svbool_t test_svcmpne_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_u16,,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svbool_t test_svcmpne_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svbool_t test_svcmpne_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_u32,,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svbool_t test_svcmpne_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svbool_t test_svcmpne_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_u64,,)(pg, op1, op2);
 }
@@ -176,7 +184,7 @@ svbool_t test_svcmpne_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
+svbool_t test_svcmpne_n_s64(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_s64,,)(pg, op1, op2);
 }
@@ -199,7 +207,7 @@ svbool_t test_svcmpne_n_s64(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
+svbool_t test_svcmpne_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_u64,,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svbool_t test_svcmpne_n_u64(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
+svbool_t test_svcmpne_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_s8,,)(pg, op1, op2);
 }
@@ -233,7 +241,7 @@ svbool_t test_svcmpne_wide_s8(svbool_t pg, svint8_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
+svbool_t test_svcmpne_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_s16,,)(pg, op1, op2);
 }
@@ -252,7 +260,7 @@ svbool_t test_svcmpne_wide_s16(svbool_t pg, svint16_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
+svbool_t test_svcmpne_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_s32,,)(pg, op1, op2);
 }
@@ -271,7 +279,7 @@ svbool_t test_svcmpne_wide_s32(svbool_t pg, svint32_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
+svbool_t test_svcmpne_n_s8(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_s8,,)(pg, op1, op2);
 }
@@ -294,7 +302,7 @@ svbool_t test_svcmpne_n_s8(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
+svbool_t test_svcmpne_n_s16(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_s16,,)(pg, op1, op2);
 }
@@ -317,7 +325,7 @@ svbool_t test_svcmpne_n_s16(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
+svbool_t test_svcmpne_n_s32(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_s32,,)(pg, op1, op2);
 }
@@ -336,7 +344,7 @@ svbool_t test_svcmpne_n_s32(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
+svbool_t test_svcmpne_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_u8,,)(pg, op1, op2);
 }
@@ -359,7 +367,7 @@ svbool_t test_svcmpne_n_u8(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
+svbool_t test_svcmpne_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_u16,,)(pg, op1, op2);
 }
@@ -382,7 +390,7 @@ svbool_t test_svcmpne_n_u16(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
+svbool_t test_svcmpne_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_u32,,)(pg, op1, op2);
 }
@@ -401,7 +409,7 @@ svbool_t test_svcmpne_n_u32(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmpne_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_f16,,)(pg, op1, op2);
 }
@@ -420,7 +428,7 @@ svbool_t test_svcmpne_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmpne_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_f32,,)(pg, op1, op2);
 }
@@ -439,7 +447,7 @@ svbool_t test_svcmpne_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmpne_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_f64,,)(pg, op1, op2);
 }
@@ -462,7 +470,7 @@ svbool_t test_svcmpne_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmpne_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_f16,,)(pg, op1, op2);
 }
@@ -485,7 +493,7 @@ svbool_t test_svcmpne_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmpne_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_f32,,)(pg, op1, op2);
 }
@@ -508,7 +516,7 @@ svbool_t test_svcmpne_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmpne_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne,_n_f64,,)(pg, op1, op2);
 }
@@ -527,7 +535,7 @@ svbool_t test_svcmpne_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svcmpne_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
+svbool_t test_svcmpne_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_n_s8,,)(pg, op1, op2);
 }
@@ -550,7 +558,7 @@ svbool_t test_svcmpne_wide_n_s8(svbool_t pg, svint8_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
+svbool_t test_svcmpne_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_n_s16,,)(pg, op1, op2);
 }
@@ -573,7 +581,7 @@ svbool_t test_svcmpne_wide_n_s16(svbool_t pg, svint16_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpne_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2)
+svbool_t test_svcmpne_wide_n_s32(svbool_t pg, svint32_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpne_wide,_n_s32,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c
index 98f0e04429369..02014b099d6f5 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cmpuo.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svbool_t test_svcmpuo_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_f16,,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svbool_t test_svcmpuo_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svbool_t test_svcmpuo_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_f32,,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svbool_t test_svcmpuo_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svbool_t test_svcmpuo_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_f64,,)(pg, op1, op2);
 }
@@ -89,7 +97,7 @@ svbool_t test_svcmpuo_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
+svbool_t test_svcmpuo_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_n_f16,,)(pg, op1, op2);
 }
@@ -112,7 +120,7 @@ svbool_t test_svcmpuo_n_f16(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
+svbool_t test_svcmpuo_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_n_f32,,)(pg, op1, op2);
 }
@@ -135,7 +143,7 @@ svbool_t test_svcmpuo_n_f32(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
 //
-svbool_t test_svcmpuo_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2)
+svbool_t test_svcmpuo_n_f64(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcmpuo,_n_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c
index eb80d68eeae30..367fda709d82b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnot.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svcnot_s8_z(svbool_t pg, svint8_t op)
+svint8_t test_svcnot_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svint8_t test_svcnot_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcnot_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svcnot_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svint16_t test_svcnot_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcnot_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svcnot_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svint32_t test_svcnot_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcnot_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svcnot_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svint64_t test_svcnot_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnot_u8_z(svbool_t pg, svuint8_t op)
+svuint8_t test_svcnot_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u8,_z,)(pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svcnot_u8_z(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnot_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svcnot_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u16,_z,)(pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svcnot_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnot_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svcnot_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u32,_z,)(pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svcnot_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnot_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svcnot_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u64,_z,)(pg, op);
 }
@@ -156,7 +164,7 @@ svuint64_t test_svcnot_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svcnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
+svint8_t test_svcnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s8,_m,)(inactive, pg, op);
 }
@@ -173,7 +181,7 @@ svint8_t test_svcnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svcnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s16,_m,)(inactive, pg, op);
 }
@@ -190,7 +198,7 @@ svint16_t test_svcnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svcnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s32,_m,)(inactive, pg, op);
 }
@@ -207,7 +215,7 @@ svint32_t test_svcnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svcnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s64,_m,)(inactive, pg, op);
 }
@@ -222,7 +230,7 @@ svint64_t test_svcnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
+svuint8_t test_svcnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u8,_m,)(inactive, pg, op);
 }
@@ -239,7 +247,7 @@ svuint8_t test_svcnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svcnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u16,_m,)(inactive, pg, op);
 }
@@ -256,7 +264,7 @@ svuint16_t test_svcnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svcnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u32,_m,)(inactive, pg, op);
 }
@@ -273,7 +281,7 @@ svuint32_t test_svcnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svcnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u64,_m,)(inactive, pg, op);
 }
@@ -288,7 +296,7 @@ svuint64_t test_svcnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svcnot_s8_x(svbool_t pg, svint8_t op)
+svint8_t test_svcnot_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s8,_x,)(pg, op);
 }
@@ -305,7 +313,7 @@ svint8_t test_svcnot_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcnot_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svcnot_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s16,_x,)(pg, op);
 }
@@ -322,7 +330,7 @@ svint16_t test_svcnot_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcnot_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svcnot_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s32,_x,)(pg, op);
 }
@@ -339,7 +347,7 @@ svint32_t test_svcnot_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcnot_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svcnot_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_s64,_x,)(pg, op);
 }
@@ -354,7 +362,7 @@ svint64_t test_svcnot_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnot.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnot_u8_x(svbool_t pg, svuint8_t op)
+svuint8_t test_svcnot_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u8,_x,)(pg, op);
 }
@@ -371,7 +379,7 @@ svuint8_t test_svcnot_u8_x(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnot.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnot_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svcnot_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u16,_x,)(pg, op);
 }
@@ -388,7 +396,7 @@ svuint16_t test_svcnot_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnot_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svcnot_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u32,_x,)(pg, op);
 }
@@ -405,7 +413,7 @@ svuint32_t test_svcnot_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnot.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnot_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svcnot_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnot,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c
index 0d164fe76393b..9c28182166702 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) {
+svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svcnt_bf16_z'}}
   return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op);
 }
@@ -44,7 +52,7 @@ svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) {
+svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svcnt_bf16_m'}}
   return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op);
 }
@@ -60,7 +68,7 @@ svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8bf16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) {
+svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svcnt_bf16_x'}}
   return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c
index 45ccccb2fba18..fe545adbd6a10 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_s8_z(svbool_t pg, svint8_t op)
+svuint8_t test_svcnt_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svuint8_t test_svcnt_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_s16_z(svbool_t pg, svint16_t op)
+svuint16_t test_svcnt_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svuint16_t test_svcnt_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_s32_z(svbool_t pg, svint32_t op)
+svuint32_t test_svcnt_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svuint32_t test_svcnt_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_s64_z(svbool_t pg, svint64_t op)
+svuint64_t test_svcnt_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svuint64_t test_svcnt_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_u8_z(svbool_t pg, svuint8_t op)
+svuint8_t test_svcnt_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u8,_z,)(pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svcnt_u8_z(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svcnt_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u16,_z,)(pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svcnt_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svcnt_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u32,_z,)(pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svcnt_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svcnt_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u64,_z,)(pg, op);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svcnt_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_f16_z(svbool_t pg, svfloat16_t op)
+svuint16_t test_svcnt_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f16,_z,)(pg, op);
 }
@@ -175,7 +183,7 @@ svuint16_t test_svcnt_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_f32_z(svbool_t pg, svfloat32_t op)
+svuint32_t test_svcnt_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f32,_z,)(pg, op);
 }
@@ -192,7 +200,7 @@ svuint32_t test_svcnt_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_f64_z(svbool_t pg, svfloat64_t op)
+svuint64_t test_svcnt_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f64,_z,)(pg, op);
 }
@@ -207,7 +215,7 @@ svuint64_t test_svcnt_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
+svuint8_t test_svcnt_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s8,_m,)(inactive, pg, op);
 }
@@ -224,7 +232,7 @@ svuint8_t test_svcnt_s8_m(svuint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
+svuint16_t test_svcnt_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s16,_m,)(inactive, pg, op);
 }
@@ -241,7 +249,7 @@ svuint16_t test_svcnt_s16_m(svuint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
+svuint32_t test_svcnt_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s32,_m,)(inactive, pg, op);
 }
@@ -258,7 +266,7 @@ svuint32_t test_svcnt_s32_m(svuint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
+svuint64_t test_svcnt_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s64,_m,)(inactive, pg, op);
 }
@@ -273,7 +281,7 @@ svuint64_t test_svcnt_s64_m(svuint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
+svuint8_t test_svcnt_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u8,_m,)(inactive, pg, op);
 }
@@ -290,7 +298,7 @@ svuint8_t test_svcnt_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svcnt_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u16,_m,)(inactive, pg, op);
 }
@@ -307,7 +315,7 @@ svuint16_t test_svcnt_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svcnt_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u32,_m,)(inactive, pg, op);
 }
@@ -324,7 +332,7 @@ svuint32_t test_svcnt_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svcnt_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u64,_m,)(inactive, pg, op);
 }
@@ -341,7 +349,7 @@ svuint64_t test_svcnt_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op)
+svuint16_t test_svcnt_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f16,_m,)(inactive, pg, op);
 }
@@ -358,7 +366,7 @@ svuint16_t test_svcnt_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op)
+svuint32_t test_svcnt_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f32,_m,)(inactive, pg, op);
 }
@@ -375,7 +383,7 @@ svuint32_t test_svcnt_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op)
+svuint64_t test_svcnt_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f64,_m,)(inactive, pg, op);
 }
@@ -390,7 +398,7 @@ svuint64_t test_svcnt_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_s8_x(svbool_t pg, svint8_t op)
+svuint8_t test_svcnt_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s8,_x,)(pg, op);
 }
@@ -407,7 +415,7 @@ svuint8_t test_svcnt_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_s16_x(svbool_t pg, svint16_t op)
+svuint16_t test_svcnt_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s16,_x,)(pg, op);
 }
@@ -424,7 +432,7 @@ svuint16_t test_svcnt_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_s32_x(svbool_t pg, svint32_t op)
+svuint32_t test_svcnt_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s32,_x,)(pg, op);
 }
@@ -441,7 +449,7 @@ svuint32_t test_svcnt_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_s64_x(svbool_t pg, svint64_t op)
+svuint64_t test_svcnt_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_s64,_x,)(pg, op);
 }
@@ -456,7 +464,7 @@ svuint64_t test_svcnt_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.cnt.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svcnt_u8_x(svbool_t pg, svuint8_t op)
+svuint8_t test_svcnt_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u8,_x,)(pg, op);
 }
@@ -473,7 +481,7 @@ svuint8_t test_svcnt_u8_x(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svcnt_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u16,_x,)(pg, op);
 }
@@ -490,7 +498,7 @@ svuint16_t test_svcnt_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svcnt_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u32,_x,)(pg, op);
 }
@@ -507,7 +515,7 @@ svuint32_t test_svcnt_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svcnt_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_u64,_x,)(pg, op);
 }
@@ -524,7 +532,7 @@ svuint64_t test_svcnt_u64_x(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.cnt.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcnt_f16_x(svbool_t pg, svfloat16_t op)
+svuint16_t test_svcnt_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f16,_x,)(pg, op);
 }
@@ -541,7 +549,7 @@ svuint16_t test_svcnt_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcnt_f32_x(svbool_t pg, svfloat32_t op)
+svuint32_t test_svcnt_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f32,_x,)(pg, op);
 }
@@ -558,7 +566,7 @@ svuint32_t test_svcnt_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.cnt.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcnt_f64_x(svbool_t pg, svfloat64_t op)
+svuint64_t test_svcnt_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcnt,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
index 70a9360f9a32b..3623f50bbd5ba 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntb.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svcntb(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -17,7 +25,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntb()
+uint64_t test_svcntb(void) MODE_ATTR
 {
   return svcntb();
 }
@@ -32,7 +40,7 @@ uint64_t test_svcntb()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 0)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat()
+uint64_t test_svcntb_pat(void) MODE_ATTR
 {
   return svcntb_pat(SV_POW2);
 }
@@ -45,7 +53,7 @@ uint64_t test_svcntb_pat()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 1
 //
-uint64_t test_svcntb_pat_1()
+uint64_t test_svcntb_pat_1(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL1);
 }
@@ -58,7 +66,7 @@ uint64_t test_svcntb_pat_1()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 2
 //
-uint64_t test_svcntb_pat_2()
+uint64_t test_svcntb_pat_2(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL2);
 }
@@ -71,7 +79,7 @@ uint64_t test_svcntb_pat_2()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 3
 //
-uint64_t test_svcntb_pat_3()
+uint64_t test_svcntb_pat_3(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL3);
 }
@@ -84,7 +92,7 @@ uint64_t test_svcntb_pat_3()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 4
 //
-uint64_t test_svcntb_pat_4()
+uint64_t test_svcntb_pat_4(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL4);
 }
@@ -97,7 +105,7 @@ uint64_t test_svcntb_pat_4()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 5
 //
-uint64_t test_svcntb_pat_5()
+uint64_t test_svcntb_pat_5(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL5);
 }
@@ -110,7 +118,7 @@ uint64_t test_svcntb_pat_5()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 6
 //
-uint64_t test_svcntb_pat_6()
+uint64_t test_svcntb_pat_6(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL6);
 }
@@ -123,7 +131,7 @@ uint64_t test_svcntb_pat_6()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 7
 //
-uint64_t test_svcntb_pat_7()
+uint64_t test_svcntb_pat_7(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL7);
 }
@@ -136,7 +144,7 @@ uint64_t test_svcntb_pat_7()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 8
 //
-uint64_t test_svcntb_pat_8()
+uint64_t test_svcntb_pat_8(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL8);
 }
@@ -149,7 +157,7 @@ uint64_t test_svcntb_pat_8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 16
 //
-uint64_t test_svcntb_pat_9()
+uint64_t test_svcntb_pat_9(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL16);
 }
@@ -164,7 +172,7 @@ uint64_t test_svcntb_pat_9()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 10)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_10()
+uint64_t test_svcntb_pat_10(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL32);
 }
@@ -179,7 +187,7 @@ uint64_t test_svcntb_pat_10()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 11)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_11()
+uint64_t test_svcntb_pat_11(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL64);
 }
@@ -194,7 +202,7 @@ uint64_t test_svcntb_pat_11()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 12)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_12()
+uint64_t test_svcntb_pat_12(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL128);
 }
@@ -209,7 +217,7 @@ uint64_t test_svcntb_pat_12()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 13)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_13()
+uint64_t test_svcntb_pat_13(void) MODE_ATTR
 {
   return svcntb_pat(SV_VL256);
 }
@@ -224,7 +232,7 @@ uint64_t test_svcntb_pat_13()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 29)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_14()
+uint64_t test_svcntb_pat_14(void) MODE_ATTR
 {
   return svcntb_pat(SV_MUL4);
 }
@@ -239,7 +247,7 @@ uint64_t test_svcntb_pat_14()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntb(i32 30)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntb_pat_15()
+uint64_t test_svcntb_pat_15(void) MODE_ATTR
 {
   return svcntb_pat(SV_MUL3);
 }
@@ -256,7 +264,7 @@ uint64_t test_svcntb_pat_15()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntb_pat_16()
+uint64_t test_svcntb_pat_16(void) MODE_ATTR
 {
   return svcntb_pat(SV_ALL);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
index 5b05fca3c78a2..b3e6dcf540627 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntd.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svcntd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -17,7 +25,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntd()
+uint64_t test_svcntd(void) MODE_ATTR
 {
   return svcntd();
 }
@@ -32,7 +40,7 @@ uint64_t test_svcntd()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 0)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat()
+uint64_t test_svcntd_pat(void) MODE_ATTR
 {
   return svcntd_pat(SV_POW2);
 }
@@ -45,7 +53,7 @@ uint64_t test_svcntd_pat()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 1
 //
-uint64_t test_svcntd_pat_1()
+uint64_t test_svcntd_pat_1(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL1);
 }
@@ -58,7 +66,7 @@ uint64_t test_svcntd_pat_1()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 2
 //
-uint64_t test_svcntd_pat_2()
+uint64_t test_svcntd_pat_2(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL2);
 }
@@ -73,7 +81,7 @@ uint64_t test_svcntd_pat_2()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 3)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_3()
+uint64_t test_svcntd_pat_3(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL3);
 }
@@ -88,7 +96,7 @@ uint64_t test_svcntd_pat_3()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 4)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_4()
+uint64_t test_svcntd_pat_4(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL4);
 }
@@ -103,7 +111,7 @@ uint64_t test_svcntd_pat_4()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 5)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_5()
+uint64_t test_svcntd_pat_5(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL5);
 }
@@ -118,7 +126,7 @@ uint64_t test_svcntd_pat_5()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 6)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_6()
+uint64_t test_svcntd_pat_6(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL6);
 }
@@ -133,7 +141,7 @@ uint64_t test_svcntd_pat_6()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 7)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_7()
+uint64_t test_svcntd_pat_7(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL7);
 }
@@ -148,7 +156,7 @@ uint64_t test_svcntd_pat_7()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 8)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_8()
+uint64_t test_svcntd_pat_8(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL8);
 }
@@ -163,7 +171,7 @@ uint64_t test_svcntd_pat_8()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 9)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_9()
+uint64_t test_svcntd_pat_9(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL16);
 }
@@ -178,7 +186,7 @@ uint64_t test_svcntd_pat_9()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 10)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_10()
+uint64_t test_svcntd_pat_10(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL32);
 }
@@ -193,7 +201,7 @@ uint64_t test_svcntd_pat_10()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 11)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_11()
+uint64_t test_svcntd_pat_11(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL64);
 }
@@ -208,7 +216,7 @@ uint64_t test_svcntd_pat_11()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 12)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_12()
+uint64_t test_svcntd_pat_12(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL128);
 }
@@ -223,7 +231,7 @@ uint64_t test_svcntd_pat_12()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 13)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_13()
+uint64_t test_svcntd_pat_13(void) MODE_ATTR
 {
   return svcntd_pat(SV_VL256);
 }
@@ -238,7 +246,7 @@ uint64_t test_svcntd_pat_13()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 29)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_14()
+uint64_t test_svcntd_pat_14(void) MODE_ATTR
 {
   return svcntd_pat(SV_MUL4);
 }
@@ -253,7 +261,7 @@ uint64_t test_svcntd_pat_14()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntd(i32 30)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntd_pat_15()
+uint64_t test_svcntd_pat_15(void) MODE_ATTR
 {
   return svcntd_pat(SV_MUL3);
 }
@@ -270,7 +278,7 @@ uint64_t test_svcntd_pat_15()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntd_pat_16()
+uint64_t test_svcntd_pat_16(void) MODE_ATTR
 {
   return svcntd_pat(SV_ALL);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c
index 82d374302096c..6d510f382ca74 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnth.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svcnth(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -17,7 +25,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcnth()
+uint64_t test_svcnth(void) MODE_ATTR
 {
   return svcnth();
 }
@@ -32,7 +40,7 @@ uint64_t test_svcnth()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 0)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat()
+uint64_t test_svcnth_pat(void) MODE_ATTR
 {
   return svcnth_pat(SV_POW2);
 }
@@ -45,7 +53,7 @@ uint64_t test_svcnth_pat()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 1
 //
-uint64_t test_svcnth_pat_1()
+uint64_t test_svcnth_pat_1(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL1);
 }
@@ -58,7 +66,7 @@ uint64_t test_svcnth_pat_1()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 2
 //
-uint64_t test_svcnth_pat_2()
+uint64_t test_svcnth_pat_2(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL2);
 }
@@ -71,7 +79,7 @@ uint64_t test_svcnth_pat_2()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 3
 //
-uint64_t test_svcnth_pat_3()
+uint64_t test_svcnth_pat_3(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL3);
 }
@@ -84,7 +92,7 @@ uint64_t test_svcnth_pat_3()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 4
 //
-uint64_t test_svcnth_pat_4()
+uint64_t test_svcnth_pat_4(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL4);
 }
@@ -97,7 +105,7 @@ uint64_t test_svcnth_pat_4()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 5
 //
-uint64_t test_svcnth_pat_5()
+uint64_t test_svcnth_pat_5(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL5);
 }
@@ -110,7 +118,7 @@ uint64_t test_svcnth_pat_5()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 6
 //
-uint64_t test_svcnth_pat_6()
+uint64_t test_svcnth_pat_6(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL6);
 }
@@ -123,7 +131,7 @@ uint64_t test_svcnth_pat_6()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 7
 //
-uint64_t test_svcnth_pat_7()
+uint64_t test_svcnth_pat_7(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL7);
 }
@@ -136,7 +144,7 @@ uint64_t test_svcnth_pat_7()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 8
 //
-uint64_t test_svcnth_pat_8()
+uint64_t test_svcnth_pat_8(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL8);
 }
@@ -151,7 +159,7 @@ uint64_t test_svcnth_pat_8()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 9)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_9()
+uint64_t test_svcnth_pat_9(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL16);
 }
@@ -166,7 +174,7 @@ uint64_t test_svcnth_pat_9()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 10)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_10()
+uint64_t test_svcnth_pat_10(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL32);
 }
@@ -181,7 +189,7 @@ uint64_t test_svcnth_pat_10()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 11)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_11()
+uint64_t test_svcnth_pat_11(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL64);
 }
@@ -196,7 +204,7 @@ uint64_t test_svcnth_pat_11()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 12)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_12()
+uint64_t test_svcnth_pat_12(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL128);
 }
@@ -211,7 +219,7 @@ uint64_t test_svcnth_pat_12()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 13)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_13()
+uint64_t test_svcnth_pat_13(void) MODE_ATTR
 {
   return svcnth_pat(SV_VL256);
 }
@@ -226,7 +234,7 @@ uint64_t test_svcnth_pat_13()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 29)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_14()
+uint64_t test_svcnth_pat_14(void) MODE_ATTR
 {
   return svcnth_pat(SV_MUL4);
 }
@@ -241,7 +249,7 @@ uint64_t test_svcnth_pat_14()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cnth(i32 30)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcnth_pat_15()
+uint64_t test_svcnth_pat_15(void) MODE_ATTR
 {
   return svcnth_pat(SV_MUL3);
 }
@@ -258,7 +266,7 @@ uint64_t test_svcnth_pat_15()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcnth_pat_16()
+uint64_t test_svcnth_pat_16(void) MODE_ATTR
 {
   return svcnth_pat(SV_ALL);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c
index a5208d4dc5b31..056eb4a02a1c0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntp.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svcntp_b8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
@@ -15,7 +23,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntp_b8(svbool_t pg, svbool_t op)
+uint64_t test_svcntp_b8(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svcntp_b8(pg, op);
 }
@@ -34,7 +42,7 @@ uint64_t test_svcntp_b8(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP2]]
 //
-uint64_t test_svcntp_b16(svbool_t pg, svbool_t op)
+uint64_t test_svcntp_b16(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svcntp_b16(pg, op);
 }
@@ -53,7 +61,7 @@ uint64_t test_svcntp_b16(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP2]]
 //
-uint64_t test_svcntp_b32(svbool_t pg, svbool_t op)
+uint64_t test_svcntp_b32(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svcntp_b32(pg, op);
 }
@@ -72,7 +80,7 @@ uint64_t test_svcntp_b32(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i1> [[TMP1]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP2]]
 //
-uint64_t test_svcntp_b64(svbool_t pg, svbool_t op)
+uint64_t test_svcntp_b64(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svcntp_b64(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c
index a0a2931211b02..1a659ef81c0cc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cntw.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svcntw(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -17,7 +25,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntw()
+uint64_t test_svcntw(void) MODE_ATTR
 {
   return svcntw();
 }
@@ -32,7 +40,7 @@ uint64_t test_svcntw()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 0)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat()
+uint64_t test_svcntw_pat(void) MODE_ATTR
 {
   return svcntw_pat(SV_POW2);
 }
@@ -45,7 +53,7 @@ uint64_t test_svcntw_pat()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 1
 //
-uint64_t test_svcntw_pat_1()
+uint64_t test_svcntw_pat_1(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL1);
 }
@@ -58,7 +66,7 @@ uint64_t test_svcntw_pat_1()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 2
 //
-uint64_t test_svcntw_pat_2()
+uint64_t test_svcntw_pat_2(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL2);
 }
@@ -71,7 +79,7 @@ uint64_t test_svcntw_pat_2()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 3
 //
-uint64_t test_svcntw_pat_3()
+uint64_t test_svcntw_pat_3(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL3);
 }
@@ -84,7 +92,7 @@ uint64_t test_svcntw_pat_3()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret i64 4
 //
-uint64_t test_svcntw_pat_4()
+uint64_t test_svcntw_pat_4(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL4);
 }
@@ -99,7 +107,7 @@ uint64_t test_svcntw_pat_4()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 5)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_5()
+uint64_t test_svcntw_pat_5(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL5);
 }
@@ -114,7 +122,7 @@ uint64_t test_svcntw_pat_5()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 6)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_6()
+uint64_t test_svcntw_pat_6(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL6);
 }
@@ -129,7 +137,7 @@ uint64_t test_svcntw_pat_6()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 7)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_7()
+uint64_t test_svcntw_pat_7(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL7);
 }
@@ -144,7 +152,7 @@ uint64_t test_svcntw_pat_7()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 8)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_8()
+uint64_t test_svcntw_pat_8(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL8);
 }
@@ -159,7 +167,7 @@ uint64_t test_svcntw_pat_8()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 9)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_9()
+uint64_t test_svcntw_pat_9(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL16);
 }
@@ -174,7 +182,7 @@ uint64_t test_svcntw_pat_9()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 10)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_10()
+uint64_t test_svcntw_pat_10(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL32);
 }
@@ -189,7 +197,7 @@ uint64_t test_svcntw_pat_10()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 11)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_11()
+uint64_t test_svcntw_pat_11(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL64);
 }
@@ -204,7 +212,7 @@ uint64_t test_svcntw_pat_11()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 12)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_12()
+uint64_t test_svcntw_pat_12(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL128);
 }
@@ -219,7 +227,7 @@ uint64_t test_svcntw_pat_12()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 13)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_13()
+uint64_t test_svcntw_pat_13(void) MODE_ATTR
 {
   return svcntw_pat(SV_VL256);
 }
@@ -234,7 +242,7 @@ uint64_t test_svcntw_pat_13()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 29)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_14()
+uint64_t test_svcntw_pat_14(void) MODE_ATTR
 {
   return svcntw_pat(SV_MUL4);
 }
@@ -249,7 +257,7 @@ uint64_t test_svcntw_pat_14()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.cntw(i32 30)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svcntw_pat_15()
+uint64_t test_svcntw_pat_15(void) MODE_ATTR
 {
   return svcntw_pat(SV_MUL3);
 }
@@ -266,7 +274,7 @@ uint64_t test_svcntw_pat_15()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svcntw_pat_16()
+uint64_t test_svcntw_pat_16(void) MODE_ATTR
 {
   return svcntw_pat(SV_ALL);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
index d12fc41af4756..afff231b7740b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +b16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +b16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
index e21557e9f4aa5..2338f80fd0896 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create2.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
index 1bf7389dc4539..86533e58f5617 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
index 7310336b71551..aeff07104c189 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create3.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
index 40c511536873a..3067ae4875719 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c
index 1008534070d04..b2781e97f7ec2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_create4.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c
index 12b804b37ebc5..145d60db6eda3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt-bfloat.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svcvt_bf16, _f32, _x, )(pg, op);
 }
 
@@ -43,7 +50,7 @@ svbfloat16_t test_svcvt_bf16_f32_x(svbool_t pg, svfloat32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svcvt_bf16, _f32, _z, )(pg, op);
 }
 
@@ -59,6 +66,6 @@ svbfloat16_t test_svcvt_bf16_f32_z(svbool_t pg, svfloat32_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvt_bf16_f32_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svcvt_bf16, _f32, _m, )(inactive, pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c
index aa2de6412e6e3..9dfbf08a29e67 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcvt_s16_f16_z(svbool_t pg, svfloat16_t op)
+svint16_t test_svcvt_s16_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s16,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint16_t test_svcvt_s16_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcvt_s16_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op)
+svint16_t test_svcvt_s16_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s16,_f16,_m,)(inactive, pg, op);
 }
@@ -60,7 +68,7 @@ svint16_t test_svcvt_s16_f16_m(svint16_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzs.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svcvt_s16_f16_x(svbool_t pg, svfloat16_t op)
+svint16_t test_svcvt_s16_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s16,_f16,_x,)(pg, op);
 }
@@ -77,7 +85,7 @@ svint16_t test_svcvt_s16_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcvt_u16_f16_z(svbool_t pg, svfloat16_t op)
+svuint16_t test_svcvt_u16_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u16,_f16,_z,)(pg, op);
 }
@@ -94,7 +102,7 @@ svuint16_t test_svcvt_u16_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcvt_u16_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op)
+svuint16_t test_svcvt_u16_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u16,_f16,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svuint16_t test_svcvt_u16_f16_m(svuint16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzu.nxv8i16.nxv8f16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svcvt_u16_f16_x(svbool_t pg, svfloat16_t op)
+svuint16_t test_svcvt_u16_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u16,_f16,_x,)(pg, op);
 }
@@ -128,7 +136,7 @@ svuint16_t test_svcvt_u16_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f16_z(svbool_t pg, svfloat16_t op)
+svint32_t test_svcvt_s32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f16,_z,)(pg, op);
 }
@@ -145,7 +153,7 @@ svint32_t test_svcvt_s32_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f32_z(svbool_t pg, svfloat32_t op)
+svint32_t test_svcvt_s32_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f32,_z,)(pg, op);
 }
@@ -162,7 +170,7 @@ svint32_t test_svcvt_s32_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f64_z(svbool_t pg, svfloat64_t op)
+svint32_t test_svcvt_s32_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f64,_z,)(pg, op);
 }
@@ -179,7 +187,7 @@ svint32_t test_svcvt_s32_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f16_m(svint32_t inactive, svbool_t pg, svfloat16_t op)
+svint32_t test_svcvt_s32_f16_m(svint32_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f16,_m,)(inactive, pg, op);
 }
@@ -196,7 +204,7 @@ svint32_t test_svcvt_s32_f16_m(svint32_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f32_m(svint32_t inactive, svbool_t pg, svfloat32_t op)
+svint32_t test_svcvt_s32_f32_m(svint32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f32,_m,)(inactive, pg, op);
 }
@@ -213,7 +221,7 @@ svint32_t test_svcvt_s32_f32_m(svint32_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f64_m(svint32_t inactive, svbool_t pg, svfloat64_t op)
+svint32_t test_svcvt_s32_f64_m(svint32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f64,_m,)(inactive, pg, op);
 }
@@ -230,7 +238,7 @@ svint32_t test_svcvt_s32_f64_m(svint32_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f16_x(svbool_t pg, svfloat16_t op)
+svint32_t test_svcvt_s32_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f16,_x,)(pg, op);
 }
@@ -247,7 +255,7 @@ svint32_t test_svcvt_s32_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f32_x(svbool_t pg, svfloat32_t op)
+svint32_t test_svcvt_s32_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f32,_x,)(pg, op);
 }
@@ -264,7 +272,7 @@ svint32_t test_svcvt_s32_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzs.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svcvt_s32_f64_x(svbool_t pg, svfloat64_t op)
+svint32_t test_svcvt_s32_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s32,_f64,_x,)(pg, op);
 }
@@ -281,7 +289,7 @@ svint32_t test_svcvt_s32_f64_x(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f16_z(svbool_t pg, svfloat16_t op)
+svint64_t test_svcvt_s64_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f16,_z,)(pg, op);
 }
@@ -298,7 +306,7 @@ svint64_t test_svcvt_s64_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f32_z(svbool_t pg, svfloat32_t op)
+svint64_t test_svcvt_s64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f32,_z,)(pg, op);
 }
@@ -315,7 +323,7 @@ svint64_t test_svcvt_s64_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f64_z(svbool_t pg, svfloat64_t op)
+svint64_t test_svcvt_s64_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f64,_z,)(pg, op);
 }
@@ -332,7 +340,7 @@ svint64_t test_svcvt_s64_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f16_m(svint64_t inactive, svbool_t pg, svfloat16_t op)
+svint64_t test_svcvt_s64_f16_m(svint64_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f16,_m,)(inactive, pg, op);
 }
@@ -349,7 +357,7 @@ svint64_t test_svcvt_s64_f16_m(svint64_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f32_m(svint64_t inactive, svbool_t pg, svfloat32_t op)
+svint64_t test_svcvt_s64_f32_m(svint64_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f32,_m,)(inactive, pg, op);
 }
@@ -366,7 +374,7 @@ svint64_t test_svcvt_s64_f32_m(svint64_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f64_m(svint64_t inactive, svbool_t pg, svfloat64_t op)
+svint64_t test_svcvt_s64_f64_m(svint64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f64,_m,)(inactive, pg, op);
 }
@@ -383,7 +391,7 @@ svint64_t test_svcvt_s64_f64_m(svint64_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f16_x(svbool_t pg, svfloat16_t op)
+svint64_t test_svcvt_s64_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f16,_x,)(pg, op);
 }
@@ -400,7 +408,7 @@ svint64_t test_svcvt_s64_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f32_x(svbool_t pg, svfloat32_t op)
+svint64_t test_svcvt_s64_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f32,_x,)(pg, op);
 }
@@ -417,7 +425,7 @@ svint64_t test_svcvt_s64_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzs.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svcvt_s64_f64_x(svbool_t pg, svfloat64_t op)
+svint64_t test_svcvt_s64_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_s64,_f64,_x,)(pg, op);
 }
@@ -434,7 +442,7 @@ svint64_t test_svcvt_s64_f64_x(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f16_z(svbool_t pg, svfloat16_t op)
+svuint32_t test_svcvt_u32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f16,_z,)(pg, op);
 }
@@ -451,7 +459,7 @@ svuint32_t test_svcvt_u32_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f32_z(svbool_t pg, svfloat32_t op)
+svuint32_t test_svcvt_u32_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f32,_z,)(pg, op);
 }
@@ -468,7 +476,7 @@ svuint32_t test_svcvt_u32_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f64_z(svbool_t pg, svfloat64_t op)
+svuint32_t test_svcvt_u32_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f64,_z,)(pg, op);
 }
@@ -485,7 +493,7 @@ svuint32_t test_svcvt_u32_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f16_m(svuint32_t inactive, svbool_t pg, svfloat16_t op)
+svuint32_t test_svcvt_u32_f16_m(svuint32_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f16,_m,)(inactive, pg, op);
 }
@@ -502,7 +510,7 @@ svuint32_t test_svcvt_u32_f16_m(svuint32_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op)
+svuint32_t test_svcvt_u32_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f32,_m,)(inactive, pg, op);
 }
@@ -519,7 +527,7 @@ svuint32_t test_svcvt_u32_f32_m(svuint32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f64_m(svuint32_t inactive, svbool_t pg, svfloat64_t op)
+svuint32_t test_svcvt_u32_f64_m(svuint32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f64,_m,)(inactive, pg, op);
 }
@@ -536,7 +544,7 @@ svuint32_t test_svcvt_u32_f64_m(svuint32_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f16(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f16_x(svbool_t pg, svfloat16_t op)
+svuint32_t test_svcvt_u32_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f16,_x,)(pg, op);
 }
@@ -553,7 +561,7 @@ svuint32_t test_svcvt_u32_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.nxv4i32.nxv4f32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f32_x(svbool_t pg, svfloat32_t op)
+svuint32_t test_svcvt_u32_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f32,_x,)(pg, op);
 }
@@ -570,7 +578,7 @@ svuint32_t test_svcvt_u32_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzu.i32f64(<vscale x 4 x i32> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svcvt_u32_f64_x(svbool_t pg, svfloat64_t op)
+svuint32_t test_svcvt_u32_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u32,_f64,_x,)(pg, op);
 }
@@ -587,7 +595,7 @@ svuint32_t test_svcvt_u32_f64_x(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f16_z(svbool_t pg, svfloat16_t op)
+svuint64_t test_svcvt_u64_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f16,_z,)(pg, op);
 }
@@ -604,7 +612,7 @@ svuint64_t test_svcvt_u64_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f32_z(svbool_t pg, svfloat32_t op)
+svuint64_t test_svcvt_u64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f32,_z,)(pg, op);
 }
@@ -621,7 +629,7 @@ svuint64_t test_svcvt_u64_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f64_z(svbool_t pg, svfloat64_t op)
+svuint64_t test_svcvt_u64_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f64,_z,)(pg, op);
 }
@@ -638,7 +646,7 @@ svuint64_t test_svcvt_u64_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f16_m(svuint64_t inactive, svbool_t pg, svfloat16_t op)
+svuint64_t test_svcvt_u64_f16_m(svuint64_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f16,_m,)(inactive, pg, op);
 }
@@ -655,7 +663,7 @@ svuint64_t test_svcvt_u64_f16_m(svuint64_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f32_m(svuint64_t inactive, svbool_t pg, svfloat32_t op)
+svuint64_t test_svcvt_u64_f32_m(svuint64_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f32,_m,)(inactive, pg, op);
 }
@@ -672,7 +680,7 @@ svuint64_t test_svcvt_u64_f32_m(svuint64_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op)
+svuint64_t test_svcvt_u64_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f64,_m,)(inactive, pg, op);
 }
@@ -689,7 +697,7 @@ svuint64_t test_svcvt_u64_f64_m(svuint64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f16(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f16_x(svbool_t pg, svfloat16_t op)
+svuint64_t test_svcvt_u64_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f16,_x,)(pg, op);
 }
@@ -706,7 +714,7 @@ svuint64_t test_svcvt_u64_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.i64f32(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f32_x(svbool_t pg, svfloat32_t op)
+svuint64_t test_svcvt_u64_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f32,_x,)(pg, op);
 }
@@ -723,7 +731,7 @@ svuint64_t test_svcvt_u64_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.fcvtzu.nxv2i64.nxv2f64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svcvt_u64_f64_x(svbool_t pg, svfloat64_t op)
+svuint64_t test_svcvt_u64_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_u64,_f64,_x,)(pg, op);
 }
@@ -740,7 +748,7 @@ svuint64_t test_svcvt_u64_f64_x(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s32_z(svbool_t pg, svint32_t op)
+svfloat16_t test_svcvt_f16_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s32,_z,)(pg, op);
 }
@@ -757,7 +765,7 @@ svfloat16_t test_svcvt_f16_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s32_z(svbool_t pg, svint32_t op)
+svfloat32_t test_svcvt_f32_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s32,_z,)(pg, op);
 }
@@ -774,7 +782,7 @@ svfloat32_t test_svcvt_f32_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s32_z(svbool_t pg, svint32_t op)
+svfloat64_t test_svcvt_f64_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s32,_z,)(pg, op);
 }
@@ -791,7 +799,7 @@ svfloat64_t test_svcvt_f64_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s32_m(svfloat16_t inactive, svbool_t pg, svint32_t op)
+svfloat16_t test_svcvt_f16_s32_m(svfloat16_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s32,_m,)(inactive, pg, op);
 }
@@ -808,7 +816,7 @@ svfloat16_t test_svcvt_f16_s32_m(svfloat16_t inactive, svbool_t pg, svint32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s32_m(svfloat32_t inactive, svbool_t pg, svint32_t op)
+svfloat32_t test_svcvt_f32_s32_m(svfloat32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s32,_m,)(inactive, pg, op);
 }
@@ -825,7 +833,7 @@ svfloat32_t test_svcvt_f32_s32_m(svfloat32_t inactive, svbool_t pg, svint32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s32_m(svfloat64_t inactive, svbool_t pg, svint32_t op)
+svfloat64_t test_svcvt_f64_s32_m(svfloat64_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s32,_m,)(inactive, pg, op);
 }
@@ -842,7 +850,7 @@ svfloat64_t test_svcvt_f64_s32_m(svfloat64_t inactive, svbool_t pg, svint32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s32_x(svbool_t pg, svint32_t op)
+svfloat16_t test_svcvt_f16_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s32,_x,)(pg, op);
 }
@@ -859,7 +867,7 @@ svfloat16_t test_svcvt_f16_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s32_x(svbool_t pg, svint32_t op)
+svfloat32_t test_svcvt_f32_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s32,_x,)(pg, op);
 }
@@ -876,7 +884,7 @@ svfloat32_t test_svcvt_f32_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s32_x(svbool_t pg, svint32_t op)
+svfloat64_t test_svcvt_f64_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s32,_x,)(pg, op);
 }
@@ -893,7 +901,7 @@ svfloat64_t test_svcvt_f64_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s64_z(svbool_t pg, svint64_t op)
+svfloat16_t test_svcvt_f16_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s64,_z,)(pg, op);
 }
@@ -910,7 +918,7 @@ svfloat16_t test_svcvt_f16_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s64_z(svbool_t pg, svint64_t op)
+svfloat32_t test_svcvt_f32_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s64,_z,)(pg, op);
 }
@@ -927,7 +935,7 @@ svfloat32_t test_svcvt_f32_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s64_z(svbool_t pg, svint64_t op)
+svfloat64_t test_svcvt_f64_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s64,_z,)(pg, op);
 }
@@ -944,7 +952,7 @@ svfloat64_t test_svcvt_f64_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s64_m(svfloat16_t inactive, svbool_t pg, svint64_t op)
+svfloat16_t test_svcvt_f16_s64_m(svfloat16_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s64,_m,)(inactive, pg, op);
 }
@@ -961,7 +969,7 @@ svfloat16_t test_svcvt_f16_s64_m(svfloat16_t inactive, svbool_t pg, svint64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s64_m(svfloat32_t inactive, svbool_t pg, svint64_t op)
+svfloat32_t test_svcvt_f32_s64_m(svfloat32_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s64,_m,)(inactive, pg, op);
 }
@@ -978,7 +986,7 @@ svfloat32_t test_svcvt_f32_s64_m(svfloat32_t inactive, svbool_t pg, svint64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s64_m(svfloat64_t inactive, svbool_t pg, svint64_t op)
+svfloat64_t test_svcvt_f64_s64_m(svfloat64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s64,_m,)(inactive, pg, op);
 }
@@ -995,7 +1003,7 @@ svfloat64_t test_svcvt_f64_s64_m(svfloat64_t inactive, svbool_t pg, svint64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_s64_x(svbool_t pg, svint64_t op)
+svfloat16_t test_svcvt_f16_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_s64,_x,)(pg, op);
 }
@@ -1012,7 +1020,7 @@ svfloat16_t test_svcvt_f16_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_s64_x(svbool_t pg, svint64_t op)
+svfloat32_t test_svcvt_f32_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_s64,_x,)(pg, op);
 }
@@ -1029,7 +1037,7 @@ svfloat32_t test_svcvt_f32_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_s64_x(svbool_t pg, svint64_t op)
+svfloat64_t test_svcvt_f64_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_s64,_x,)(pg, op);
 }
@@ -1046,7 +1054,7 @@ svfloat64_t test_svcvt_f64_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u32_z(svbool_t pg, svuint32_t op)
+svfloat16_t test_svcvt_f16_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u32,_z,)(pg, op);
 }
@@ -1063,7 +1071,7 @@ svfloat16_t test_svcvt_f16_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u32_z(svbool_t pg, svuint32_t op)
+svfloat32_t test_svcvt_f32_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u32,_z,)(pg, op);
 }
@@ -1080,7 +1088,7 @@ svfloat32_t test_svcvt_f32_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u32_z(svbool_t pg, svuint32_t op)
+svfloat64_t test_svcvt_f64_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u32,_z,)(pg, op);
 }
@@ -1097,7 +1105,7 @@ svfloat64_t test_svcvt_f64_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u32_m(svfloat16_t inactive, svbool_t pg, svuint32_t op)
+svfloat16_t test_svcvt_f16_u32_m(svfloat16_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u32,_m,)(inactive, pg, op);
 }
@@ -1114,7 +1122,7 @@ svfloat16_t test_svcvt_f16_u32_m(svfloat16_t inactive, svbool_t pg, svuint32_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u32_m(svfloat32_t inactive, svbool_t pg, svuint32_t op)
+svfloat32_t test_svcvt_f32_u32_m(svfloat32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u32,_m,)(inactive, pg, op);
 }
@@ -1131,7 +1139,7 @@ svfloat32_t test_svcvt_f32_u32_m(svfloat32_t inactive, svbool_t pg, svuint32_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u32_m(svfloat64_t inactive, svbool_t pg, svuint32_t op)
+svfloat64_t test_svcvt_f64_u32_m(svfloat64_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u32,_m,)(inactive, pg, op);
 }
@@ -1148,7 +1156,7 @@ svfloat64_t test_svcvt_f64_u32_m(svfloat64_t inactive, svbool_t pg, svuint32_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u32_x(svbool_t pg, svuint32_t op)
+svfloat16_t test_svcvt_f16_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u32,_x,)(pg, op);
 }
@@ -1165,7 +1173,7 @@ svfloat16_t test_svcvt_f16_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.nxv4f32.nxv4i32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u32_x(svbool_t pg, svuint32_t op)
+svfloat32_t test_svcvt_f32_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u32,_x,)(pg, op);
 }
@@ -1182,7 +1190,7 @@ svfloat32_t test_svcvt_f32_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.f64i32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u32_x(svbool_t pg, svuint32_t op)
+svfloat64_t test_svcvt_f64_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u32,_x,)(pg, op);
 }
@@ -1199,7 +1207,7 @@ svfloat64_t test_svcvt_f64_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u64_z(svbool_t pg, svuint64_t op)
+svfloat16_t test_svcvt_f16_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u64,_z,)(pg, op);
 }
@@ -1216,7 +1224,7 @@ svfloat16_t test_svcvt_f16_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u64_z(svbool_t pg, svuint64_t op)
+svfloat32_t test_svcvt_f32_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u64,_z,)(pg, op);
 }
@@ -1233,7 +1241,7 @@ svfloat32_t test_svcvt_f32_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u64_z(svbool_t pg, svuint64_t op)
+svfloat64_t test_svcvt_f64_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u64,_z,)(pg, op);
 }
@@ -1250,7 +1258,7 @@ svfloat64_t test_svcvt_f64_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u64_m(svfloat16_t inactive, svbool_t pg, svuint64_t op)
+svfloat16_t test_svcvt_f16_u64_m(svfloat16_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u64,_m,)(inactive, pg, op);
 }
@@ -1267,7 +1275,7 @@ svfloat16_t test_svcvt_f16_u64_m(svfloat16_t inactive, svbool_t pg, svuint64_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u64_m(svfloat32_t inactive, svbool_t pg, svuint64_t op)
+svfloat32_t test_svcvt_f32_u64_m(svfloat32_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u64,_m,)(inactive, pg, op);
 }
@@ -1284,7 +1292,7 @@ svfloat32_t test_svcvt_f32_u64_m(svfloat32_t inactive, svbool_t pg, svuint64_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u64_m(svfloat64_t inactive, svbool_t pg, svuint64_t op)
+svfloat64_t test_svcvt_f64_u64_m(svfloat64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u64,_m,)(inactive, pg, op);
 }
@@ -1301,7 +1309,7 @@ svfloat64_t test_svcvt_f64_u64_m(svfloat64_t inactive, svbool_t pg, svuint64_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtf.f16i64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_u64_x(svbool_t pg, svuint64_t op)
+svfloat16_t test_svcvt_f16_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_u64,_x,)(pg, op);
 }
@@ -1318,7 +1326,7 @@ svfloat16_t test_svcvt_f16_u64_x(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtf.f32i64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_u64_x(svbool_t pg, svuint64_t op)
+svfloat32_t test_svcvt_f32_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_u64,_x,)(pg, op);
 }
@@ -1335,7 +1343,7 @@ svfloat32_t test_svcvt_f32_u64_x(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtf.nxv2f64.nxv2i64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_u64_x(svbool_t pg, svuint64_t op)
+svfloat64_t test_svcvt_f64_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_u64,_x,)(pg, op);
 }
@@ -1352,7 +1360,7 @@ svfloat64_t test_svcvt_f64_u64_x(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f16_z(svbool_t pg, svfloat16_t op)
+svfloat32_t test_svcvt_f32_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f16,_z,)(pg, op);
 }
@@ -1369,7 +1377,7 @@ svfloat32_t test_svcvt_f32_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f16_z(svbool_t pg, svfloat16_t op)
+svfloat64_t test_svcvt_f64_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f16,_z,)(pg, op);
 }
@@ -1386,7 +1394,7 @@ svfloat64_t test_svcvt_f64_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f16_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op)
+svfloat32_t test_svcvt_f32_f16_m(svfloat32_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f16,_m,)(inactive, pg, op);
 }
@@ -1403,7 +1411,7 @@ svfloat32_t test_svcvt_f32_f16_m(svfloat32_t inactive, svbool_t pg, svfloat16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f16_m(svfloat64_t inactive, svbool_t pg, svfloat16_t op)
+svfloat64_t test_svcvt_f64_f16_m(svfloat64_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f16,_m,)(inactive, pg, op);
 }
@@ -1420,7 +1428,7 @@ svfloat64_t test_svcvt_f64_f16_m(svfloat64_t inactive, svbool_t pg, svfloat16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f16_x(svbool_t pg, svfloat16_t op)
+svfloat32_t test_svcvt_f32_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f16,_x,)(pg, op);
 }
@@ -1437,7 +1445,7 @@ svfloat32_t test_svcvt_f32_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f16(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f16_x(svbool_t pg, svfloat16_t op)
+svfloat64_t test_svcvt_f64_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f16,_x,)(pg, op);
 }
@@ -1454,7 +1462,7 @@ svfloat64_t test_svcvt_f64_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f32_z(svbool_t pg, svfloat32_t op)
+svfloat64_t test_svcvt_f64_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f32,_z,)(pg, op);
 }
@@ -1471,7 +1479,7 @@ svfloat64_t test_svcvt_f64_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f32_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op)
+svfloat64_t test_svcvt_f64_f32_m(svfloat64_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f32,_m,)(inactive, pg, op);
 }
@@ -1488,7 +1496,7 @@ svfloat64_t test_svcvt_f64_f32_m(svfloat64_t inactive, svbool_t pg, svfloat32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fcvt.f64f32(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svcvt_f64_f32_x(svbool_t pg, svfloat32_t op)
+svfloat64_t test_svcvt_f64_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f64,_f32,_x,)(pg, op);
 }
@@ -1505,7 +1513,7 @@ svfloat64_t test_svcvt_f64_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f32_z(svbool_t pg, svfloat32_t op)
+svfloat16_t test_svcvt_f16_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f32,_z,)(pg, op);
 }
@@ -1522,7 +1530,7 @@ svfloat16_t test_svcvt_f16_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f64_z(svbool_t pg, svfloat64_t op)
+svfloat16_t test_svcvt_f16_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f64,_z,)(pg, op);
 }
@@ -1539,7 +1547,7 @@ svfloat16_t test_svcvt_f16_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f32_m(svfloat16_t inactive, svbool_t pg, svfloat32_t op)
+svfloat16_t test_svcvt_f16_f32_m(svfloat16_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f32,_m,)(inactive, pg, op);
 }
@@ -1556,7 +1564,7 @@ svfloat16_t test_svcvt_f16_f32_m(svfloat16_t inactive, svbool_t pg, svfloat32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f64_m(svfloat16_t inactive, svbool_t pg, svfloat64_t op)
+svfloat16_t test_svcvt_f16_f64_m(svfloat16_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f64,_m,)(inactive, pg, op);
 }
@@ -1573,7 +1581,7 @@ svfloat16_t test_svcvt_f16_f64_m(svfloat16_t inactive, svbool_t pg, svfloat64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f32(<vscale x 8 x half> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f32_x(svbool_t pg, svfloat32_t op)
+svfloat16_t test_svcvt_f16_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f32,_x,)(pg, op);
 }
@@ -1590,7 +1598,7 @@ svfloat16_t test_svcvt_f16_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fcvt.f16f64(<vscale x 8 x half> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svcvt_f16_f64_x(svbool_t pg, svfloat64_t op)
+svfloat16_t test_svcvt_f16_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f16,_f64,_x,)(pg, op);
 }
@@ -1607,7 +1615,7 @@ svfloat16_t test_svcvt_f16_f64_x(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f64_z(svbool_t pg, svfloat64_t op)
+svfloat32_t test_svcvt_f32_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f64,_z,)(pg, op);
 }
@@ -1624,7 +1632,7 @@ svfloat32_t test_svcvt_f32_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f64_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op)
+svfloat32_t test_svcvt_f32_f64_m(svfloat32_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f64,_m,)(inactive, pg, op);
 }
@@ -1641,7 +1649,7 @@ svfloat32_t test_svcvt_f32_f64_m(svfloat32_t inactive, svbool_t pg, svfloat64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svcvt_f32_f64_x(svbool_t pg, svfloat64_t op)
+svfloat32_t test_svcvt_f32_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svcvt_f32,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
index 8772917715a7d..ce719f92674c0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cvtnt.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _x, )(even, pg, op);
 }
 
@@ -43,6 +50,6 @@ svbfloat16_t test_svcvtnt_bf16_f32_x(svbfloat16_t even, svbool_t pg, svfloat32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> [[EVEN:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) {
+svbfloat16_t test_svcvtnt_bf16_f32_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) MODE_ATTR {
   return SVE_ACLE_FUNC(svcvtnt_bf16, _f32, _m, )(even, pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c
index 441ce1b718543..8755532a43ddb 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_div.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svdiv_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdiv_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s32,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svint32_t test_svdiv_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svdiv_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdiv_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s64,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svint64_t test_svdiv_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svdiv_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdiv_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u32,_z,)(pg, op1, op2);
 }
@@ -85,7 +93,7 @@ svuint32_t test_svdiv_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svdiv_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdiv_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u64,_z,)(pg, op1, op2);
 }
@@ -102,7 +110,7 @@ svuint64_t test_svdiv_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdiv_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdiv_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s32,_m,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svint32_t test_svdiv_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdiv_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdiv_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s64,_m,)(pg, op1, op2);
 }
@@ -136,7 +144,7 @@ svint64_t test_svdiv_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdiv_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdiv_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u32,_m,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svuint32_t test_svdiv_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdiv_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdiv_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u64,_m,)(pg, op1, op2);
 }
@@ -170,7 +178,7 @@ svuint64_t test_svdiv_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdiv_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdiv_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s32,_x,)(pg, op1, op2);
 }
@@ -187,7 +195,7 @@ svint32_t test_svdiv_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdiv_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdiv_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_s64,_x,)(pg, op1, op2);
 }
@@ -204,7 +212,7 @@ svint64_t test_svdiv_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdiv_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdiv_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u32,_x,)(pg, op1, op2);
 }
@@ -221,7 +229,7 @@ svuint32_t test_svdiv_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdiv_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdiv_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_u64,_x,)(pg, op1, op2);
 }
@@ -244,7 +252,7 @@ svuint64_t test_svdiv_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svdiv_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdiv_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s32,_z,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svint32_t test_svdiv_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svdiv_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdiv_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s64,_z,)(pg, op1, op2);
 }
@@ -290,7 +298,7 @@ svint64_t test_svdiv_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svdiv_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdiv_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u32,_z,)(pg, op1, op2);
 }
@@ -313,7 +321,7 @@ svuint32_t test_svdiv_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svdiv_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdiv_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u64,_z,)(pg, op1, op2);
 }
@@ -334,7 +342,7 @@ svuint64_t test_svdiv_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdiv_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdiv_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s32,_m,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svdiv_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdiv_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdiv_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s64,_m,)(pg, op1, op2);
 }
@@ -376,7 +384,7 @@ svint64_t test_svdiv_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdiv_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdiv_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u32,_m,)(pg, op1, op2);
 }
@@ -397,7 +405,7 @@ svuint32_t test_svdiv_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdiv_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdiv_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u64,_m,)(pg, op1, op2);
 }
@@ -418,7 +426,7 @@ svuint64_t test_svdiv_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdiv_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdiv_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s32,_x,)(pg, op1, op2);
 }
@@ -439,7 +447,7 @@ svint32_t test_svdiv_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdiv_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdiv_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_s64,_x,)(pg, op1, op2);
 }
@@ -460,7 +468,7 @@ svint64_t test_svdiv_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdiv_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdiv_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u32,_x,)(pg, op1, op2);
 }
@@ -481,7 +489,7 @@ svuint32_t test_svdiv_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdiv_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdiv_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_u64,_x,)(pg, op1, op2);
 }
@@ -500,7 +508,7 @@ svuint64_t test_svdiv_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svdiv_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdiv_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f16,_z,)(pg, op1, op2);
 }
@@ -519,7 +527,7 @@ svfloat16_t test_svdiv_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svdiv_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdiv_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f32,_z,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svfloat32_t test_svdiv_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svdiv_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdiv_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f64,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svfloat64_t test_svdiv_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdiv_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdiv_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f16,_m,)(pg, op1, op2);
 }
@@ -572,7 +580,7 @@ svfloat16_t test_svdiv_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdiv_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdiv_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f32,_m,)(pg, op1, op2);
 }
@@ -589,7 +597,7 @@ svfloat32_t test_svdiv_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdiv_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdiv_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f64,_m,)(pg, op1, op2);
 }
@@ -606,7 +614,7 @@ svfloat64_t test_svdiv_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdiv_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdiv_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f16,_x,)(pg, op1, op2);
 }
@@ -623,7 +631,7 @@ svfloat16_t test_svdiv_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdiv_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdiv_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f32,_x,)(pg, op1, op2);
 }
@@ -640,7 +648,7 @@ svfloat32_t test_svdiv_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdiv_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdiv_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_f64,_x,)(pg, op1, op2);
 }
@@ -663,7 +671,7 @@ svfloat64_t test_svdiv_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svdiv_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdiv_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f16,_z,)(pg, op1, op2);
 }
@@ -686,7 +694,7 @@ svfloat16_t test_svdiv_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svdiv_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdiv_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f32,_z,)(pg, op1, op2);
 }
@@ -709,7 +717,7 @@ svfloat32_t test_svdiv_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svdiv_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdiv_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f64,_z,)(pg, op1, op2);
 }
@@ -730,7 +738,7 @@ svfloat64_t test_svdiv_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdiv_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdiv_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f16,_m,)(pg, op1, op2);
 }
@@ -751,7 +759,7 @@ svfloat16_t test_svdiv_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdiv_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdiv_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f32,_m,)(pg, op1, op2);
 }
@@ -772,7 +780,7 @@ svfloat32_t test_svdiv_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdiv_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdiv_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f64,_m,)(pg, op1, op2);
 }
@@ -793,7 +801,7 @@ svfloat64_t test_svdiv_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdiv_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdiv_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f16,_x,)(pg, op1, op2);
 }
@@ -814,7 +822,7 @@ svfloat16_t test_svdiv_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdiv_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdiv_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f32,_x,)(pg, op1, op2);
 }
@@ -835,7 +843,7 @@ svfloat32_t test_svdiv_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdiv_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdiv_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdiv,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c
index d4d0364400e7d..5f9636d908d51 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_divr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svdivr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdivr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s32,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svint32_t test_svdivr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svdivr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdivr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s64,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svint64_t test_svdivr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svdivr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdivr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u32,_z,)(pg, op1, op2);
 }
@@ -85,7 +93,7 @@ svuint32_t test_svdivr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svdivr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdivr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u64,_z,)(pg, op1, op2);
 }
@@ -102,7 +110,7 @@ svuint64_t test_svdivr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdivr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdivr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s32,_m,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svint32_t test_svdivr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdivr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdivr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s64,_m,)(pg, op1, op2);
 }
@@ -136,7 +144,7 @@ svint64_t test_svdivr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdivr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdivr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u32,_m,)(pg, op1, op2);
 }
@@ -153,7 +161,7 @@ svuint32_t test_svdivr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdivr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdivr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u64,_m,)(pg, op1, op2);
 }
@@ -170,7 +178,7 @@ svuint64_t test_svdivr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdivr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svdivr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s32,_x,)(pg, op1, op2);
 }
@@ -187,7 +195,7 @@ svint32_t test_svdivr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdivr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svdivr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_s64,_x,)(pg, op1, op2);
 }
@@ -204,7 +212,7 @@ svint64_t test_svdivr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdivr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svdivr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u32,_x,)(pg, op1, op2);
 }
@@ -221,7 +229,7 @@ svuint32_t test_svdivr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdivr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svdivr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_u64,_x,)(pg, op1, op2);
 }
@@ -244,7 +252,7 @@ svuint64_t test_svdivr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svdivr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdivr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s32,_z,)(pg, op1, op2);
 }
@@ -267,7 +275,7 @@ svint32_t test_svdivr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svdivr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdivr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s64,_z,)(pg, op1, op2);
 }
@@ -290,7 +298,7 @@ svint64_t test_svdivr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svdivr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdivr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u32,_z,)(pg, op1, op2);
 }
@@ -313,7 +321,7 @@ svuint32_t test_svdivr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svdivr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdivr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u64,_z,)(pg, op1, op2);
 }
@@ -334,7 +342,7 @@ svuint64_t test_svdivr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdivr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdivr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s32,_m,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svdivr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdivr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdivr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s64,_m,)(pg, op1, op2);
 }
@@ -376,7 +384,7 @@ svint64_t test_svdivr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdivr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdivr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u32,_m,)(pg, op1, op2);
 }
@@ -397,7 +405,7 @@ svuint32_t test_svdivr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udivr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdivr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdivr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u64,_m,)(pg, op1, op2);
 }
@@ -418,7 +426,7 @@ svuint64_t test_svdivr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdivr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svdivr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s32,_x,)(pg, op1, op2);
 }
@@ -439,7 +447,7 @@ svint32_t test_svdivr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdivr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svdivr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_s64,_x,)(pg, op1, op2);
 }
@@ -460,7 +468,7 @@ svint64_t test_svdivr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdivr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svdivr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u32,_x,)(pg, op1, op2);
 }
@@ -481,7 +489,7 @@ svuint32_t test_svdivr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udiv.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdivr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svdivr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_u64,_x,)(pg, op1, op2);
 }
@@ -500,7 +508,7 @@ svuint64_t test_svdivr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svdivr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdivr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f16,_z,)(pg, op1, op2);
 }
@@ -519,7 +527,7 @@ svfloat16_t test_svdivr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svdivr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdivr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f32,_z,)(pg, op1, op2);
 }
@@ -538,7 +546,7 @@ svfloat32_t test_svdivr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svdivr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdivr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f64,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svfloat64_t test_svdivr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdivr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdivr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f16,_m,)(pg, op1, op2);
 }
@@ -572,7 +580,7 @@ svfloat16_t test_svdivr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdivr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdivr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f32,_m,)(pg, op1, op2);
 }
@@ -589,7 +597,7 @@ svfloat32_t test_svdivr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdivr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdivr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f64,_m,)(pg, op1, op2);
 }
@@ -606,7 +614,7 @@ svfloat64_t test_svdivr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdivr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svdivr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f16,_x,)(pg, op1, op2);
 }
@@ -623,7 +631,7 @@ svfloat16_t test_svdivr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdivr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svdivr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f32,_x,)(pg, op1, op2);
 }
@@ -640,7 +648,7 @@ svfloat32_t test_svdivr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdivr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svdivr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_f64,_x,)(pg, op1, op2);
 }
@@ -663,7 +671,7 @@ svfloat64_t test_svdivr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svdivr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdivr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f16,_z,)(pg, op1, op2);
 }
@@ -686,7 +694,7 @@ svfloat16_t test_svdivr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svdivr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdivr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f32,_z,)(pg, op1, op2);
 }
@@ -709,7 +717,7 @@ svfloat32_t test_svdivr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svdivr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdivr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f64,_z,)(pg, op1, op2);
 }
@@ -730,7 +738,7 @@ svfloat64_t test_svdivr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdivr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdivr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f16,_m,)(pg, op1, op2);
 }
@@ -751,7 +759,7 @@ svfloat16_t test_svdivr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdivr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdivr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f32,_m,)(pg, op1, op2);
 }
@@ -772,7 +780,7 @@ svfloat32_t test_svdivr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdivr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdivr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f64,_m,)(pg, op1, op2);
 }
@@ -793,7 +801,7 @@ svfloat64_t test_svdivr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdivr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svdivr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f16,_x,)(pg, op1, op2);
 }
@@ -814,7 +822,7 @@ svfloat16_t test_svdivr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdivr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svdivr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f32,_x,)(pg, op1, op2);
 }
@@ -835,7 +843,7 @@ svfloat32_t test_svdivr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdivr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svdivr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdivr,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
index 924dd8f0af262..3dcd25f8739ba 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dot.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3)
+svint32_t test_svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_s32,,)(op1, op2, op3);
 }
@@ -39,7 +47,7 @@ svint32_t test_svdot_s32(svint32_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3)
+svint64_t test_svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_s64,,)(op1, op2, op3);
 }
@@ -54,7 +62,7 @@ svint64_t test_svdot_s64(svint64_t op1, svint16_t op2, svint16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdot_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
+svuint32_t test_svdot_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_u32,,)(op1, op2, op3);
 }
@@ -69,7 +77,7 @@ svuint32_t test_svdot_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdot_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
+svuint64_t test_svdot_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_u64,,)(op1, op2, op3);
 }
@@ -88,7 +96,7 @@ svuint64_t test_svdot_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdot_n_s32(svint32_t op1, svint8_t op2, int8_t op3)
+svint32_t test_svdot_n_s32(svint32_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_n_s32,,)(op1, op2, op3);
 }
@@ -107,7 +115,7 @@ svint32_t test_svdot_n_s32(svint32_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdot_n_s64(svint64_t op1, svint16_t op2, int16_t op3)
+svint64_t test_svdot_n_s64(svint64_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_n_s64,,)(op1, op2, op3);
 }
@@ -126,7 +134,7 @@ svint64_t test_svdot_n_s64(svint64_t op1, svint16_t op2, int16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdot_n_u32(svuint32_t op1, svuint8_t op2, uint8_t op3)
+svuint32_t test_svdot_n_u32(svuint32_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_n_u32,,)(op1, op2, op3);
 }
@@ -145,7 +153,7 @@ svuint32_t test_svdot_n_u32(svuint32_t op1, svuint8_t op2, uint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udot.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdot_n_u64(svuint64_t op1, svuint16_t op2, uint16_t op3)
+svuint64_t test_svdot_n_u64(svuint64_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot,_n_u64,,)(op1, op2, op3);
 }
@@ -160,7 +168,7 @@ svuint64_t test_svdot_n_u64(svuint64_t op1, svuint16_t op2, uint16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
+svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 0);
 }
@@ -175,7 +183,7 @@ svint32_t test_svdot_lane_s32(svint32_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdot.lane.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3)
+svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_s32,,)(op1, op2, op3, 3);
 }
@@ -190,7 +198,7 @@ svint32_t test_svdot_lane_s32_1(svint32_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3)
+svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 0);
 }
@@ -205,7 +213,7 @@ svint64_t test_svdot_lane_s64(svint64_t op1, svint16_t op2, svint16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sdot.lane.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3)
+svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_s64,,)(op1, op2, op3, 1);
 }
@@ -220,7 +228,7 @@ svint64_t test_svdot_lane_s64_1(svint64_t op1, svint16_t op2, svint16_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udot.lane.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
+svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_u32,,)(op1, op2, op3, 3);
 }
@@ -235,7 +243,7 @@ svuint32_t test_svdot_lane_u32(svuint32_t op1, svuint8_t op2, svuint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.udot.lane.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3)
+svuint64_t test_svdot_lane_u64(svuint64_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdot_lane,_u64,,)(op1, op2, op3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
index 3c459531b7ff2..2e58b503dd81d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 8 x bfloat> [[DOTSPLATINSERT]], <vscale x 8 x bfloat> poison, <vscale x 8 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svdup_n_bf16(bfloat16_t op) {
+svbfloat16_t test_svdup_n_bf16(bfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op);
 }
@@ -44,7 +52,7 @@ svbfloat16_t test_svdup_n_bf16(bfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> [[TMP0]], bfloat [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) {
+svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_z'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op);
 }
@@ -61,7 +69,7 @@ svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], bfloat [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) {
+svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_m'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op);
 }
@@ -78,7 +86,7 @@ svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> [[TMP0]], bfloat [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) {
+svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svdup_n_bf16_x'}}
   return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op);
 }
@@ -97,7 +105,7 @@ svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index)
+svbfloat16_t test_svdup_lane_bf16(svbfloat16_t data, uint16_t index) MODE_ATTR
 {
   // expected-warning at +1 {{implicit declaration of function 'svdup_lane_bf16'}}
   return SVE_ACLE_FUNC(svdup_lane,_bf16,,)(data, index);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c
index 5294ccbc4ef50..fec77ceb463ff 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_n_s8(int8_t op)
+svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s8,)(op);
 }
@@ -43,7 +51,7 @@ svint8_t test_svdup_n_s8(int8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svdup_n_s16(int16_t op)
+svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s16,)(op);
 }
@@ -60,7 +68,7 @@ svint16_t test_svdup_n_s16(int16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdup_n_s32(int32_t op)
+svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s32,)(op);
 }
@@ -77,7 +85,7 @@ svint32_t test_svdup_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdup_n_s64(int64_t op)
+svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s64,)(op);
 }
@@ -94,7 +102,7 @@ svint64_t test_svdup_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_n_u8(uint8_t op)
+svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u8,)(op);
 }
@@ -111,7 +119,7 @@ svuint8_t test_svdup_n_u8(uint8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svdup_n_u16(uint16_t op)
+svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
 }
@@ -128,7 +136,7 @@ svuint16_t test_svdup_n_u16(uint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdup_n_u32(uint32_t op)
+svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u32,)(op);
 }
@@ -145,7 +153,7 @@ svuint32_t test_svdup_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdup_n_u64(uint64_t op)
+svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u64,)(op);
 }
@@ -162,7 +170,7 @@ svuint64_t test_svdup_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svdup_n_f16(float16_t op)
+svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f16,)(op);
 }
@@ -179,7 +187,7 @@ svfloat16_t test_svdup_n_f16(float16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svdup_n_f32(float32_t op)
+svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f32,)(op);
 }
@@ -196,7 +204,7 @@ svfloat32_t test_svdup_n_f32(float32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svdup_n_f64(float64_t op)
+svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
 }
@@ -211,7 +219,7 @@ svfloat64_t test_svdup_n_f64(float64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op)
+svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s8_z,)(pg, op);
 }
@@ -228,7 +236,7 @@ svint8_t test_svdup_n_s8_z(svbool_t pg, int8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op)
+svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s16_z,)(pg, op);
 }
@@ -245,7 +253,7 @@ svint16_t test_svdup_n_s16_z(svbool_t pg, int16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op)
+svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s32_z,)(pg, op);
 }
@@ -262,7 +270,7 @@ svint32_t test_svdup_n_s32_z(svbool_t pg, int32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op)
+svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s64_z,)(pg, op);
 }
@@ -277,7 +285,7 @@ svint64_t test_svdup_n_s64_z(svbool_t pg, int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op)
+svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u8_z,)(pg, op);
 }
@@ -294,7 +302,7 @@ svuint8_t test_svdup_n_u8_z(svbool_t pg, uint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op)
+svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u16_z,)(pg, op);
 }
@@ -311,7 +319,7 @@ svuint16_t test_svdup_n_u16_z(svbool_t pg, uint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op)
+svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u32_z,)(pg, op);
 }
@@ -328,7 +336,7 @@ svuint32_t test_svdup_n_u32_z(svbool_t pg, uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op)
+svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u64_z,)(pg, op);
 }
@@ -345,7 +353,7 @@ svuint64_t test_svdup_n_u64_z(svbool_t pg, uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], half [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op)
+svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f16_z,)(pg, op);
 }
@@ -362,7 +370,7 @@ svfloat16_t test_svdup_n_f16_z(svbool_t pg, float16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], float [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op)
+svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f32_z,)(pg, op);
 }
@@ -379,7 +387,7 @@ svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], double [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op)
+svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f64_z,)(pg, op);
 }
@@ -394,7 +402,7 @@ svfloat64_t test_svdup_n_f64_z(svbool_t pg, float64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_n_s8_m(svint8_t inactive, svbool_t pg, int8_t op)
+svint8_t test_svdup_n_s8_m(svint8_t inactive, svbool_t pg, int8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s8_m,)(inactive, pg, op);
 }
@@ -411,7 +419,7 @@ svint8_t test_svdup_n_s8_m(svint8_t inactive, svbool_t pg, int8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svdup_n_s16_m(svint16_t inactive, svbool_t pg, int16_t op)
+svint16_t test_svdup_n_s16_m(svint16_t inactive, svbool_t pg, int16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s16_m,)(inactive, pg, op);
 }
@@ -428,7 +436,7 @@ svint16_t test_svdup_n_s16_m(svint16_t inactive, svbool_t pg, int16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdup_n_s32_m(svint32_t inactive, svbool_t pg, int32_t op)
+svint32_t test_svdup_n_s32_m(svint32_t inactive, svbool_t pg, int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s32_m,)(inactive, pg, op);
 }
@@ -445,7 +453,7 @@ svint32_t test_svdup_n_s32_m(svint32_t inactive, svbool_t pg, int32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdup_n_s64_m(svint64_t inactive, svbool_t pg, int64_t op)
+svint64_t test_svdup_n_s64_m(svint64_t inactive, svbool_t pg, int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s64_m,)(inactive, pg, op);
 }
@@ -460,7 +468,7 @@ svint64_t test_svdup_n_s64_m(svint64_t inactive, svbool_t pg, int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_n_u8_m(svuint8_t inactive, svbool_t pg, uint8_t op)
+svuint8_t test_svdup_n_u8_m(svuint8_t inactive, svbool_t pg, uint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u8_m,)(inactive, pg, op);
 }
@@ -477,7 +485,7 @@ svuint8_t test_svdup_n_u8_m(svuint8_t inactive, svbool_t pg, uint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svdup_n_u16_m(svuint16_t inactive, svbool_t pg, uint16_t op)
+svuint16_t test_svdup_n_u16_m(svuint16_t inactive, svbool_t pg, uint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u16_m,)(inactive, pg, op);
 }
@@ -494,7 +502,7 @@ svuint16_t test_svdup_n_u16_m(svuint16_t inactive, svbool_t pg, uint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdup_n_u32_m(svuint32_t inactive, svbool_t pg, uint32_t op)
+svuint32_t test_svdup_n_u32_m(svuint32_t inactive, svbool_t pg, uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u32_m,)(inactive, pg, op);
 }
@@ -511,7 +519,7 @@ svuint32_t test_svdup_n_u32_m(svuint32_t inactive, svbool_t pg, uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdup_n_u64_m(svuint64_t inactive, svbool_t pg, uint64_t op)
+svuint64_t test_svdup_n_u64_m(svuint64_t inactive, svbool_t pg, uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u64_m,)(inactive, pg, op);
 }
@@ -528,7 +536,7 @@ svuint64_t test_svdup_n_u64_m(svuint64_t inactive, svbool_t pg, uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], half [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdup_n_f16_m(svfloat16_t inactive, svbool_t pg, float16_t op)
+svfloat16_t test_svdup_n_f16_m(svfloat16_t inactive, svbool_t pg, float16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f16_m,)(inactive, pg, op);
 }
@@ -545,7 +553,7 @@ svfloat16_t test_svdup_n_f16_m(svfloat16_t inactive, svbool_t pg, float16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], float [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdup_n_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op)
+svfloat32_t test_svdup_n_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f32_m,)(inactive, pg, op);
 }
@@ -562,7 +570,7 @@ svfloat32_t test_svdup_n_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], double [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdup_n_f64_m(svfloat64_t inactive, svbool_t pg, float64_t op)
+svfloat64_t test_svdup_n_f64_m(svfloat64_t inactive, svbool_t pg, float64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f64_m,)(inactive, pg, op);
 }
@@ -577,7 +585,7 @@ svfloat64_t test_svdup_n_f64_m(svfloat64_t inactive, svbool_t pg, float64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_n_s8_x(svbool_t pg, int8_t op)
+svint8_t test_svdup_n_s8_x(svbool_t pg, int8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s8_x,)(pg, op);
 }
@@ -594,7 +602,7 @@ svint8_t test_svdup_n_s8_x(svbool_t pg, int8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svdup_n_s16_x(svbool_t pg, int16_t op)
+svint16_t test_svdup_n_s16_x(svbool_t pg, int16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s16_x,)(pg, op);
 }
@@ -611,7 +619,7 @@ svint16_t test_svdup_n_s16_x(svbool_t pg, int16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svdup_n_s32_x(svbool_t pg, int32_t op)
+svint32_t test_svdup_n_s32_x(svbool_t pg, int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s32_x,)(pg, op);
 }
@@ -628,7 +636,7 @@ svint32_t test_svdup_n_s32_x(svbool_t pg, int32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svdup_n_s64_x(svbool_t pg, int64_t op)
+svint64_t test_svdup_n_s64_x(svbool_t pg, int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_s64_x,)(pg, op);
 }
@@ -643,7 +651,7 @@ svint64_t test_svdup_n_s64_x(svbool_t pg, int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], i8 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_n_u8_x(svbool_t pg, uint8_t op)
+svuint8_t test_svdup_n_u8_x(svbool_t pg, uint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u8_x,)(pg, op);
 }
@@ -660,7 +668,7 @@ svuint8_t test_svdup_n_u8_x(svbool_t pg, uint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], i16 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svdup_n_u16_x(svbool_t pg, uint16_t op)
+svuint16_t test_svdup_n_u16_x(svbool_t pg, uint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u16_x,)(pg, op);
 }
@@ -677,7 +685,7 @@ svuint16_t test_svdup_n_u16_x(svbool_t pg, uint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], i32 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svdup_n_u32_x(svbool_t pg, uint32_t op)
+svuint32_t test_svdup_n_u32_x(svbool_t pg, uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u32_x,)(pg, op);
 }
@@ -694,7 +702,7 @@ svuint32_t test_svdup_n_u32_x(svbool_t pg, uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], i64 [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svdup_n_u64_x(svbool_t pg, uint64_t op)
+svuint64_t test_svdup_n_u64_x(svbool_t pg, uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_u64_x,)(pg, op);
 }
@@ -711,7 +719,7 @@ svuint64_t test_svdup_n_u64_x(svbool_t pg, uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], half [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svdup_n_f16_x(svbool_t pg, float16_t op)
+svfloat16_t test_svdup_n_f16_x(svbool_t pg, float16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f16_x,)(pg, op);
 }
@@ -728,7 +736,7 @@ svfloat16_t test_svdup_n_f16_x(svbool_t pg, float16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], float [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svdup_n_f32_x(svbool_t pg, float32_t op)
+svfloat32_t test_svdup_n_f32_x(svbool_t pg, float32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f32_x,)(pg, op);
 }
@@ -745,7 +753,7 @@ svfloat32_t test_svdup_n_f32_x(svbool_t pg, float32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], double [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svdup_n_f64_x(svbool_t pg, float64_t op)
+svfloat64_t test_svdup_n_f64_x(svbool_t pg, float64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_f64_x,)(pg, op);
 }
@@ -764,7 +772,7 @@ svfloat64_t test_svdup_n_f64_x(svbool_t pg, float64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdup_lane_s8(svint8_t data, uint8_t index)
+svint8_t test_svdup_lane_s8(svint8_t data, uint8_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_s8,,)(data, index);
 }
@@ -783,7 +791,7 @@ svint8_t test_svdup_lane_s8(svint8_t data, uint8_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svdup_lane_s16(svint16_t data, uint16_t index)
+svint16_t test_svdup_lane_s16(svint16_t data, uint16_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_s16,,)(data, index);
 }
@@ -802,7 +810,7 @@ svint16_t test_svdup_lane_s16(svint16_t data, uint16_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdup_lane_s32(svint32_t data, uint32_t index)
+svint32_t test_svdup_lane_s32(svint32_t data, uint32_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_s32,,)(data, index);
 }
@@ -821,7 +829,7 @@ svint32_t test_svdup_lane_s32(svint32_t data, uint32_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdup_lane_s64(svint64_t data, uint64_t index)
+svint64_t test_svdup_lane_s64(svint64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_s64,,)(data, index);
 }
@@ -840,7 +848,7 @@ svint64_t test_svdup_lane_s64(svint64_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdup_lane_u8(svuint8_t data, uint8_t index)
+svuint8_t test_svdup_lane_u8(svuint8_t data, uint8_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_u8,,)(data, index);
 }
@@ -859,7 +867,7 @@ svuint8_t test_svdup_lane_u8(svuint8_t data, uint8_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svdup_lane_u16(svuint16_t data, uint16_t index)
+svuint16_t test_svdup_lane_u16(svuint16_t data, uint16_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_u16,,)(data, index);
 }
@@ -878,7 +886,7 @@ svuint16_t test_svdup_lane_u16(svuint16_t data, uint16_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdup_lane_u32(svuint32_t data, uint32_t index)
+svuint32_t test_svdup_lane_u32(svuint32_t data, uint32_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_u32,,)(data, index);
 }
@@ -897,7 +905,7 @@ svuint32_t test_svdup_lane_u32(svuint32_t data, uint32_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdup_lane_u64(svuint64_t data, uint64_t index)
+svuint64_t test_svdup_lane_u64(svuint64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_u64,,)(data, index);
 }
@@ -916,7 +924,7 @@ svuint64_t test_svdup_lane_u64(svuint64_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svdup_lane_f16(svfloat16_t data, uint16_t index)
+svfloat16_t test_svdup_lane_f16(svfloat16_t data, uint16_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_f16,,)(data, index);
 }
@@ -935,7 +943,7 @@ svfloat16_t test_svdup_lane_f16(svfloat16_t data, uint16_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svdup_lane_f32(svfloat32_t data, uint32_t index)
+svfloat32_t test_svdup_lane_f32(svfloat32_t data, uint32_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_f32,,)(data, index);
 }
@@ -954,7 +962,7 @@ svfloat32_t test_svdup_lane_f32(svfloat32_t data, uint32_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svdup_lane_f64(svfloat64_t data, uint64_t index)
+svfloat64_t test_svdup_lane_f64(svfloat64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup_lane,_f64,,)(data, index);
 }
@@ -971,7 +979,7 @@ svfloat64_t test_svdup_lane_f64(svfloat64_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i1> [[DOTSPLATINSERT]], <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[DOTSPLAT]]
 //
-svbool_t test_svdup_n_b8(bool op)
+svbool_t test_svdup_n_b8(bool op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_b8,)(op);
 }
@@ -990,7 +998,7 @@ svbool_t test_svdup_n_b8(bool op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svdup_n_b16(bool op)
+svbool_t test_svdup_n_b16(bool op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_b16,)(op);
 }
@@ -1009,7 +1017,7 @@ svbool_t test_svdup_n_b16(bool op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svdup_n_b32(bool op)
+svbool_t test_svdup_n_b32(bool op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_b32,)(op);
 }
@@ -1028,7 +1036,7 @@ svbool_t test_svdup_n_b32(bool op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svdup_n_b64(bool op)
+svbool_t test_svdup_n_b64(bool op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdup,_n,_b64,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
index 6534beaf59d26..45e30aa20f29a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) {
+svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svdupq_lane_bf16'}}
   return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index);
 }
@@ -58,7 +66,7 @@ svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) {
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP9]]
 //
 svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3,
-                                bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) {
+                                bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) MODE_ATTR {
   // <assume other insertelement>
   // expected-warning at +1 {{implicit declaration of function 'svdupq_n_bf16'}}
   return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
index 9c3f4420d449b..5671383dc7339 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svdupq_lane_s8(svint8_t data, uint64_t index)
+svint8_t test_svdupq_lane_s8(svint8_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_s8,,)(data, index);
 }
@@ -39,7 +47,7 @@ svint8_t test_svdupq_lane_s8(svint8_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svdupq_lane_s16(svint16_t data, uint64_t index)
+svint16_t test_svdupq_lane_s16(svint16_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_s16,,)(data, index);
 }
@@ -54,7 +62,7 @@ svint16_t test_svdupq_lane_s16(svint16_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svdupq_lane_s32(svint32_t data, uint64_t index)
+svint32_t test_svdupq_lane_s32(svint32_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_s32,,)(data, index);
 }
@@ -69,7 +77,7 @@ svint32_t test_svdupq_lane_s32(svint32_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svdupq_lane_s64(svint64_t data, uint64_t index)
+svint64_t test_svdupq_lane_s64(svint64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_s64,,)(data, index);
 }
@@ -84,7 +92,7 @@ svint64_t test_svdupq_lane_s64(svint64_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svdupq_lane_u8(svuint8_t data, uint64_t index)
+svuint8_t test_svdupq_lane_u8(svuint8_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_u8,,)(data, index);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svdupq_lane_u8(svuint8_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svdupq_lane_u16(svuint16_t data, uint64_t index)
+svuint16_t test_svdupq_lane_u16(svuint16_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_u16,,)(data, index);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svdupq_lane_u16(svuint16_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svdupq_lane_u32(svuint32_t data, uint64_t index)
+svuint32_t test_svdupq_lane_u32(svuint32_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_u32,,)(data, index);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svdupq_lane_u32(svuint32_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svdupq_lane_u64(svuint64_t data, uint64_t index)
+svuint64_t test_svdupq_lane_u64(svuint64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_u64,,)(data, index);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svdupq_lane_u64(svuint64_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svdupq_lane_f16(svfloat16_t data, uint64_t index)
+svfloat16_t test_svdupq_lane_f16(svfloat16_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_f16,,)(data, index);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svdupq_lane_f16(svfloat16_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svdupq_lane_f32(svfloat32_t data, uint64_t index)
+svfloat32_t test_svdupq_lane_f32(svfloat32_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_f32,,)(data, index);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svdupq_lane_f32(svfloat32_t data, uint64_t index)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], i64 [[INDEX:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svdupq_lane_f64(svfloat64_t data, uint64_t index)
+svfloat64_t test_svdupq_lane_f64(svfloat64_t data, uint64_t index) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq_lane,_f64,,)(data, index);
 }
@@ -226,7 +234,7 @@ svfloat64_t test_svdupq_lane_f64(svfloat64_t data, uint64_t index)
 svint8_t test_svdupq_n_s8(int8_t x0, int8_t x1, int8_t x2, int8_t x3,
                           int8_t x4, int8_t x5, int8_t x6, int8_t x7,
                           int8_t x8, int8_t x9, int8_t x10, int8_t x11,
-                          int8_t x12, int8_t x13, int8_t x14, int8_t x15)
+                          int8_t x12, int8_t x13, int8_t x14, int8_t x15) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_s8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
@@ -261,7 +269,7 @@ svint8_t test_svdupq_n_s8(int8_t x0, int8_t x1, int8_t x2, int8_t x3,
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP9]]
 //
 svint16_t test_svdupq_n_s16(int16_t x0, int16_t x1, int16_t x2, int16_t x3,
-                            int16_t x4, int16_t x5, int16_t x6, int16_t x7)
+                            int16_t x4, int16_t x5, int16_t x6, int16_t x7) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_s16,)(x0, x1, x2, x3, x4, x5, x6, x7);
@@ -287,7 +295,7 @@ svint16_t test_svdupq_n_s16(int16_t x0, int16_t x1, int16_t x2, int16_t x3,
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svint32_t test_svdupq_n_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3)
+svint32_t test_svdupq_n_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_s32,)(x0, x1, x2, x3);
@@ -309,7 +317,7 @@ svint32_t test_svdupq_n_s32(int32_t x0, int32_t x1, int32_t x2, int32_t x3)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
 //
-svint64_t test_svdupq_n_s64(int64_t x0, int64_t x1)
+svint64_t test_svdupq_n_s64(int64_t x0, int64_t x1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq,_n,_s64,)(x0, x1);
 }
@@ -361,7 +369,7 @@ svint64_t test_svdupq_n_s64(int64_t x0, int64_t x1)
 svuint8_t test_svdupq_n_u8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
                            uint8_t x4, uint8_t x5, uint8_t x6, uint8_t x7,
                            uint8_t x8, uint8_t x9, uint8_t x10, uint8_t x11,
-                           uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15)
+                           uint8_t x12, uint8_t x13, uint8_t x14, uint8_t x15) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_u8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
@@ -396,7 +404,7 @@ svuint8_t test_svdupq_n_u8(uint8_t x0, uint8_t x1, uint8_t x2, uint8_t x3,
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP9]]
 //
 svuint16_t test_svdupq_n_u16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3,
-                             uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7)
+                             uint16_t x4, uint16_t x5, uint16_t x6, uint16_t x7) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_u16,)(x0, x1, x2, x3, x4, x5, x6, x7);
@@ -422,7 +430,7 @@ svuint16_t test_svdupq_n_u16(uint16_t x0, uint16_t x1, uint16_t x2, uint16_t x3,
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> [[TMP4]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svuint32_t test_svdupq_n_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3)
+svuint32_t test_svdupq_n_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_u32,)(x0, x1, x2, x3);
@@ -444,7 +452,7 @@ svuint32_t test_svdupq_n_u32(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> [[TMP2]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP3]]
 //
-svuint64_t test_svdupq_n_u64(uint64_t x0, uint64_t x1)
+svuint64_t test_svdupq_n_u64(uint64_t x0, uint64_t x1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq,_n,_u64,)(x0, x1);
 }
@@ -478,7 +486,7 @@ svuint64_t test_svdupq_n_u64(uint64_t x0, uint64_t x1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP9]]
 //
 svfloat16_t test_svdupq_n_f16(float16_t x0, float16_t x1, float16_t x2, float16_t x3,
-                              float16_t x4, float16_t x5, float16_t x6, float16_t x7)
+                              float16_t x4, float16_t x5, float16_t x6, float16_t x7) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_f16,)(x0, x1, x2, x3, x4, x5, x6, x7);
@@ -504,7 +512,7 @@ svfloat16_t test_svdupq_n_f16(float16_t x0, float16_t x1, float16_t x2, float16_
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> [[TMP4]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP5]]
 //
-svfloat32_t test_svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3)
+svfloat32_t test_svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_f32,)(x0, x1, x2, x3);
@@ -526,7 +534,7 @@ svfloat32_t test_svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> [[TMP2]], i64 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
 //
-svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1)
+svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq,_n,_f64,)(x0, x1);
 }
@@ -678,7 +686,7 @@ svfloat64_t test_svdupq_n_f64(float64_t x0, float64_t x1)
 svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
                           bool x4, bool x5, bool x6, bool x7,
                           bool x8, bool x9, bool x10, bool x11,
-                          bool x12, bool x13, bool x14, bool x15)
+                          bool x12, bool x13, bool x14, bool x15) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_b8,)(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15);
@@ -767,7 +775,7 @@ svbool_t test_svdupq_n_b8(bool x0, bool x1, bool x2, bool x3,
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP20]]
 //
 svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3,
-                           bool x4, bool x5, bool x6, bool x7)
+                           bool x4, bool x5, bool x6, bool x7) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_b16,)(x0, x1, x2, x3, x4, x5, x6, x7);
@@ -823,7 +831,7 @@ svbool_t test_svdupq_n_b16(bool x0, bool x1, bool x2, bool x3,
 // CPP-CHECK-NEXT:    [[TMP12:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP11]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP12]]
 //
-svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3)
+svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3) MODE_ATTR
 {
   // <assume other insertelement>
   return SVE_ACLE_FUNC(svdupq,_n,_b32,)(x0, x1, x2, x3);
@@ -863,7 +871,7 @@ svbool_t test_svdupq_n_b32(bool x0, bool x1, bool x2, bool x3)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP7]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP8]]
 //
-svbool_t test_svdupq_n_b64(bool x0, bool x1)
+svbool_t test_svdupq_n_b64(bool x0, bool x1) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svdupq,_n,_b64,)(x0, x1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c
index e39012eaed1fa..b0107da22fd7e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eor.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_sveor_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_sveor_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_sveor_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_sveor_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_sveor_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_sveor_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_sveor_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_sveor_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_sveor_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_sveor_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_sveor_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_sveor_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_sveor_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_sveor_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_sveor_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_sveor_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_sveor_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_sveor_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_sveor_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_sveor_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_sveor_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_sveor_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_sveor_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_sveor_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_sveor_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_sveor_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_sveor_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_sveor_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_sveor_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_sveor_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_sveor_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_sveor_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_sveor_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_sveor_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_sveor_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_sveor_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_sveor_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_sveor_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_sveor_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_sveor_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_sveor_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_sveor_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_sveor_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_sveor_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_sveor_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_sveor_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_sveor_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_sveor_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_sveor_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_sveor_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_sveor_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_sveor_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_sveor_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_sveor_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_sveor_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_sveor_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_sveor_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_sveor_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_sveor_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_sveor_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_sveor_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_sveor_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_sveor_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_sveor_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_sveor_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_sveor_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_sveor_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_sveor_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_sveor_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_sveor_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_sveor_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_sveor_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_sveor_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_sveor_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_sveor_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_sveor_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_sveor_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_sveor_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_sveor_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_sveor_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_sveor_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_sveor_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_sveor_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_sveor_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_sveor_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_sveor_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_sveor_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_sveor_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_sveor_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_sveor_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_sveor_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_sveor_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_sveor_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_sveor_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_sveor_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_sveor_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_sveor_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_sveor_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_sveor_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_sveor_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_sveor_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_sveor_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_sveor_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_sveor_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_sveor_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_sveor_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_sveor_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_sveor_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_sveor_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_sveor_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_sveor_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_sveor_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_sveor_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_sveor_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_sveor_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_sveor_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_sveor_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_sveor_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_sveor_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_sveor_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_sveor_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_sveor_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_sveor_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_sveor_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_sveor_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_sveor_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_sveor_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_sveor_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_sveor_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_sveor_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_sveor_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_sveor_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_sveor_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_sveor_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_sveor_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_sveor_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_sveor_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_sveor_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_sveor_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_sveor_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_sveor_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_sveor_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_sveor_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_n_u64,_x,)(pg, op1, op2);
 }
@@ -944,7 +952,7 @@ svuint64_t test_sveor_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_sveor_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_sveor_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveor,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c
index 98748dade7bb8..ab2ddcd0a7dae 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_eorv.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_sveorv_s8(svbool_t pg, svint8_t op)
+int8_t test_sveorv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int8_t test_sveorv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_sveorv_s16(svbool_t pg, svint16_t op)
+int16_t test_sveorv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int16_t test_sveorv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_sveorv_s32(svbool_t pg, svint32_t op)
+int32_t test_sveorv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int32_t test_sveorv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_sveorv_s64(svbool_t pg, svint64_t op)
+int64_t test_sveorv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_sveorv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.eorv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_sveorv_u8(svbool_t pg, svuint8_t op)
+uint8_t test_sveorv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint8_t test_sveorv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.eorv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_sveorv_u16(svbool_t pg, svuint16_t op)
+uint16_t test_sveorv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint16_t test_sveorv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.eorv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_sveorv_u32(svbool_t pg, svuint32_t op)
+uint32_t test_sveorv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint32_t test_sveorv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.eorv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_sveorv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_sveorv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(sveorv,_u64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c
index ca25257996982..69f7c059527d4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext-bfloat.c
@@ -4,9 +4,17 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest  -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest  -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ext.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], i32 127)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svext_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svext_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   // expected-warning at +1 {{implicit declaration of function 'svext_bf16'}}
   return SVE_ACLE_FUNC(svext,_bf16,,)(op1, op2, 127);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c
index 1ccfa8ffd8fc2..e5b8e7e8a270c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ext.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svext_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svext_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s8,,)(op1, op2, 0);
 }
@@ -39,7 +47,7 @@ svint8_t test_svext_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 255)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svext_s8_1(svint8_t op1, svint8_t op2)
+svint8_t test_svext_s8_1(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s8,,)(op1, op2, 255);
 }
@@ -54,7 +62,7 @@ svint8_t test_svext_s8_1(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svext_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svext_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s16,,)(op1, op2, 0);
 }
@@ -69,7 +77,7 @@ svint16_t test_svext_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], i32 127)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svext_s16_1(svint16_t op1, svint16_t op2)
+svint16_t test_svext_s16_1(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s16,,)(op1, op2, 127);
 }
@@ -84,7 +92,7 @@ svint16_t test_svext_s16_1(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svext_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svext_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s32,,)(op1, op2, 0);
 }
@@ -99,7 +107,7 @@ svint32_t test_svext_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], i32 63)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svext_s32_1(svint32_t op1, svint32_t op2)
+svint32_t test_svext_s32_1(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s32,,)(op1, op2, 63);
 }
@@ -114,7 +122,7 @@ svint32_t test_svext_s32_1(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svext_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svext_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s64,,)(op1, op2, 0);
 }
@@ -129,7 +137,7 @@ svint64_t test_svext_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i32 31)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svext_s64_1(svint64_t op1, svint64_t op2)
+svint64_t test_svext_s64_1(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_s64,,)(op1, op2, 31);
 }
@@ -144,7 +152,7 @@ svint64_t test_svext_s64_1(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ext.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], i32 255)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svext_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svext_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_u8,,)(op1, op2, 255);
 }
@@ -159,7 +167,7 @@ svuint8_t test_svext_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ext.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], i32 127)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svext_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svext_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_u16,,)(op1, op2, 127);
 }
@@ -174,7 +182,7 @@ svuint16_t test_svext_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ext.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], i32 63)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svext_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svext_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_u32,,)(op1, op2, 63);
 }
@@ -189,7 +197,7 @@ svuint32_t test_svext_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ext.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i32 31)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svext_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svext_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_u64,,)(op1, op2, 31);
 }
@@ -204,7 +212,7 @@ svuint64_t test_svext_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ext.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], i32 127)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svext_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svext_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_f16,,)(op1, op2, 127);
 }
@@ -219,7 +227,7 @@ svfloat16_t test_svext_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ext.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], i32 63)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svext_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svext_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_f32,,)(op1, op2, 63);
 }
@@ -234,7 +242,7 @@ svfloat32_t test_svext_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ext.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], i32 31)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svext_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svext_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svext,_f64,,)(op1, op2, 31);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c
index e9080bc0982fb..736b022905d0f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svextb_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svextb_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint16_t test_svextb_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svextb_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svextb_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svint32_t test_svextb_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextb_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svextb_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svint64_t test_svextb_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svextb_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svextb_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u16,_z,)(pg, op);
 }
@@ -94,7 +102,7 @@ svuint16_t test_svextb_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svextb_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svextb_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u32,_z,)(pg, op);
 }
@@ -111,7 +119,7 @@ svuint32_t test_svextb_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextb_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svextb_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u64,_z,)(pg, op);
 }
@@ -128,7 +136,7 @@ svuint64_t test_svextb_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svextb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svextb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s16,_m,)(inactive, pg, op);
 }
@@ -145,7 +153,7 @@ svint16_t test_svextb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svextb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svextb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s32,_m,)(inactive, pg, op);
 }
@@ -162,7 +170,7 @@ svint32_t test_svextb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svextb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s64,_m,)(inactive, pg, op);
 }
@@ -179,7 +187,7 @@ svint64_t test_svextb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svextb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svextb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u16,_m,)(inactive, pg, op);
 }
@@ -196,7 +204,7 @@ svuint16_t test_svextb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svextb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svextb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u32,_m,)(inactive, pg, op);
 }
@@ -213,7 +221,7 @@ svuint32_t test_svextb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svextb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u64,_m,)(inactive, pg, op);
 }
@@ -230,7 +238,7 @@ svuint64_t test_svextb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svextb_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svextb_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s16,_x,)(pg, op);
 }
@@ -247,7 +255,7 @@ svint16_t test_svextb_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svextb_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svextb_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s32,_x,)(pg, op);
 }
@@ -264,7 +272,7 @@ svint32_t test_svextb_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextb_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svextb_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_s64,_x,)(pg, op);
 }
@@ -281,7 +289,7 @@ svint64_t test_svextb_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uxtb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svextb_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svextb_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u16,_x,)(pg, op);
 }
@@ -298,7 +306,7 @@ svuint16_t test_svextb_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svextb_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svextb_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u32,_x,)(pg, op);
 }
@@ -315,7 +323,7 @@ svuint32_t test_svextb_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextb_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svextb_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextb,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c
index 9063c284e036e..32f7db5afb79f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_exth.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svexth_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svexth_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s32,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint32_t test_svexth_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svexth_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svexth_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s64,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svint64_t test_svexth_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svexth_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svexth_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u32,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svuint32_t test_svexth_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svexth_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svexth_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u64,_z,)(pg, op);
 }
@@ -94,7 +102,7 @@ svuint64_t test_svexth_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svexth_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svexth_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svint32_t test_svexth_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svexth_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svexth_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svint64_t test_svexth_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svexth_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svexth_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u32,_m,)(inactive, pg, op);
 }
@@ -145,7 +153,7 @@ svuint32_t test_svexth_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svexth_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svexth_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u64,_m,)(inactive, pg, op);
 }
@@ -162,7 +170,7 @@ svuint64_t test_svexth_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svexth_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svexth_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s32,_x,)(pg, op);
 }
@@ -179,7 +187,7 @@ svint32_t test_svexth_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svexth_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svexth_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_s64,_x,)(pg, op);
 }
@@ -196,7 +204,7 @@ svint64_t test_svexth_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svexth_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svexth_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u32,_x,)(pg, op);
 }
@@ -213,7 +221,7 @@ svuint32_t test_svexth_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxth.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svexth_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svexth_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svexth,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c
index e4ec2e1a5556a..88474236a4bc7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_extw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextw_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svextw_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_s64,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint64_t test_svextw_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextw_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svextw_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_u64,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svuint64_t test_svextw_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svextw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_s64,_m,)(inactive, pg, op);
 }
@@ -77,7 +85,7 @@ svint64_t test_svextw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svextw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_u64,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svuint64_t test_svextw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svextw_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svextw_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_s64,_x,)(pg, op);
 }
@@ -111,7 +119,7 @@ svint64_t test_svextw_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svextw_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svextw_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svextw,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
index 20073f48b3874..eb66fbf055869 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c
index a9349f6f460c8..6f1b0e415bfec 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get2.c
@@ -5,7 +5,9 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
index da00eb5d243d8..c89f686c6bd2a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c
index f536bad7cfb28..a7f74d29ed290 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get3.c
@@ -5,10 +5,12 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
index 86ee173141efa..79eed6912e3ce 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c
index cdb793ac74386..278c217a0def4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_get4.c
@@ -5,7 +5,9 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c
index 0a7158e0b92ca..688ede1fecf90 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_index.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svindex_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 [[BASE:%.*]], i8 [[STEP:%.*]])
@@ -15,7 +23,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 [[BASE:%.*]], i8 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svindex_s8(int8_t base, int8_t step)
+svint8_t test_svindex_s8(int8_t base, int8_t step) MODE_ATTR
 {
   return svindex_s8(base, step);
 }
@@ -30,7 +38,7 @@ svint8_t test_svindex_s8(int8_t base, int8_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 [[BASE:%.*]], i16 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svindex_s16(int16_t base, int16_t step)
+svint16_t test_svindex_s16(int16_t base, int16_t step) MODE_ATTR
 {
   return svindex_s16(base, step);
 }
@@ -45,7 +53,7 @@ svint16_t test_svindex_s16(int16_t base, int16_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 [[BASE:%.*]], i32 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svindex_s32(int32_t base, int32_t step)
+svint32_t test_svindex_s32(int32_t base, int32_t step) MODE_ATTR
 {
   return svindex_s32(base, step);
 }
@@ -60,7 +68,7 @@ svint32_t test_svindex_s32(int32_t base, int32_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svindex_s64(int64_t base, int64_t step)
+svint64_t test_svindex_s64(int64_t base, int64_t step) MODE_ATTR
 {
   return svindex_s64(base, step);
 }
@@ -75,7 +83,7 @@ svint64_t test_svindex_s64(int64_t base, int64_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 [[BASE:%.*]], i8 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svindex_u8(uint8_t base, uint8_t step)
+svuint8_t test_svindex_u8(uint8_t base, uint8_t step) MODE_ATTR
 {
   return svindex_u8(base, step);
 }
@@ -90,7 +98,7 @@ svuint8_t test_svindex_u8(uint8_t base, uint8_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.index.nxv8i16(i16 [[BASE:%.*]], i16 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svindex_u16(uint16_t base, uint16_t step)
+svuint16_t test_svindex_u16(uint16_t base, uint16_t step) MODE_ATTR
 {
   return svindex_u16(base, step);
 }
@@ -105,7 +113,7 @@ svuint16_t test_svindex_u16(uint16_t base, uint16_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.index.nxv4i32(i32 [[BASE:%.*]], i32 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svindex_u32(uint32_t base, uint32_t step)
+svuint32_t test_svindex_u32(uint32_t base, uint32_t step) MODE_ATTR
 {
   return svindex_u32(base, step);
 }
@@ -120,7 +128,7 @@ svuint32_t test_svindex_u32(uint32_t base, uint32_t step)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 [[STEP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svindex_u64(uint64_t base, uint64_t step)
+svuint64_t test_svindex_u64(uint64_t base, uint64_t step) MODE_ATTR
 {
   return svindex_u64(base, step);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
index d7e59bf4c72d0..f65da9b6140de 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], bfloat [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) {
+svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svinsr_n_bf16'}}
   return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c
index cf3409bbfafbc..fae3220a02f95 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], i8 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svinsr_n_s8(svint8_t op1, int8_t op2)
+svint8_t test_svinsr_n_s8(svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svinsr_n_s8(svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], i16 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svinsr_n_s16(svint16_t op1, int16_t op2)
+svint16_t test_svinsr_n_s16(svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svinsr_n_s16(svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svinsr_n_s32(svint32_t op1, int32_t op2)
+svint32_t test_svinsr_n_s32(svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svinsr_n_s32(svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svinsr_n_s64(svint64_t op1, int64_t op2)
+svint64_t test_svinsr_n_s64(svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svinsr_n_s64(svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], i8 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svinsr_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svinsr_n_u8(svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svinsr_n_u8(svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], i16 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svinsr_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svinsr_n_u16(svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svinsr_n_u16(svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svinsr_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svinsr_n_u32(svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svinsr_n_u32(svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svinsr_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svinsr_n_u64(svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svinsr_n_u64(svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], half [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svinsr_n_f16(svfloat16_t op1, float16_t op2)
+svfloat16_t test_svinsr_n_f16(svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svinsr_n_f16(svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], float [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svinsr_n_f32(svfloat32_t op1, float32_t op2)
+svfloat32_t test_svinsr_n_f32(svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svinsr_n_f32(svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], double [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svinsr_n_f64(svfloat64_t op1, float64_t op2)
+svfloat64_t test_svinsr_n_f64(svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svinsr,_n_f64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
index 6d74362e15301..3ef2a3fc9397c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret bfloat [[TMP1]]
 //
-bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) {
+bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svlasta_bf16'}}
   return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c
index ba87f343db473..5db5138981dce 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svlasta_s8(svbool_t pg, svint8_t op)
+int8_t test_svlasta_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int8_t test_svlasta_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svlasta_s16(svbool_t pg, svint16_t op)
+int16_t test_svlasta_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int16_t test_svlasta_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svlasta_s32(svbool_t pg, svint32_t op)
+int32_t test_svlasta_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int32_t test_svlasta_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svlasta_s64(svbool_t pg, svint64_t op)
+int64_t test_svlasta_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_svlasta_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svlasta_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svlasta_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint8_t test_svlasta_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svlasta_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svlasta_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint16_t test_svlasta_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svlasta_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svlasta_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint32_t test_svlasta_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlasta_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svlasta_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_u64,,)(pg, op);
 }
@@ -158,7 +166,7 @@ uint64_t test_svlasta_u64(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svlasta_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svlasta_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_f16,,)(pg, op);
 }
@@ -175,7 +183,7 @@ float16_t test_svlasta_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svlasta_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svlasta_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_f32,,)(pg, op);
 }
@@ -192,7 +200,7 @@ float32_t test_svlasta_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svlasta_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svlasta_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlasta,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
index 3532f628593a3..d2caab2bd5dfe 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret bfloat [[TMP1]]
 //
-bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) {
+bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svlastb_bf16'}}
   return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c
index 669c07511f639..ce87ff77c2c83 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svlastb_s8(svbool_t pg, svint8_t op)
+int8_t test_svlastb_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int8_t test_svlastb_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svlastb_s16(svbool_t pg, svint16_t op)
+int16_t test_svlastb_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int16_t test_svlastb_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svlastb_s32(svbool_t pg, svint32_t op)
+int32_t test_svlastb_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int32_t test_svlastb_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svlastb_s64(svbool_t pg, svint64_t op)
+int64_t test_svlastb_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_svlastb_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svlastb_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svlastb_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint8_t test_svlastb_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svlastb_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svlastb_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint16_t test_svlastb_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svlastb_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svlastb_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint32_t test_svlastb_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlastb_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svlastb_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_u64,,)(pg, op);
 }
@@ -158,7 +166,7 @@ uint64_t test_svlastb_u64(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svlastb_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svlastb_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_f16,,)(pg, op);
 }
@@ -175,7 +183,7 @@ float16_t test_svlastb_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svlastb_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svlastb_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_f32,,)(pg, op);
 }
@@ -192,7 +200,7 @@ float32_t test_svlastb_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svlastb_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svlastb_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlastb,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
index 38d88b483968d..cbc645d429e5c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1-bfloat.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_bf16,,)(pg, base);
 }
@@ -52,7 +59,7 @@ svbfloat16_t test_svld1_bf16(svbool_t pg, const bfloat16_t *base)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x bfloat> @llvm.masked.load.nxv8bf16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP4]]
 //
-svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
+svbfloat16_t test_svld1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_bf16,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
index d355ea1928027..0c5ab6c9aea9f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
+svint8_t test_svld1_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_s8,,)(pg, base);
 }
@@ -41,7 +49,7 @@ svint8_t test_svld1_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
+svint16_t test_svld1_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_s16,,)(pg, base);
 }
@@ -58,7 +66,7 @@ svint16_t test_svld1_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
+svint32_t test_svld1_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_s32,,)(pg, base);
 }
@@ -75,7 +83,7 @@ svint32_t test_svld1_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
+svint64_t test_svld1_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_s64,,)(pg, base);
 }
@@ -90,7 +98,7 @@ svint64_t test_svld1_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
+svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_u8,,)(pg, base);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svld1_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
+svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_u16,,)(pg, base);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svld1_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
+svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_u32,,)(pg, base);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svld1_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
+svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_u64,,)(pg, base);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svld1_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
+svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_f16,,)(pg, base);
 }
@@ -175,7 +183,7 @@ svfloat16_t test_svld1_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
+svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_f32,,)(pg, base);
 }
@@ -192,7 +200,7 @@ svfloat32_t test_svld1_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
+svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1,_f64,,)(pg, base);
 }
@@ -215,7 +223,7 @@ svfloat64_t test_svld1_f64(svbool_t pg, const float64_t *base)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
 //
-svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_s8,,)(pg, base, vnum);
 }
@@ -240,7 +248,7 @@ svint8_t test_svld1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
 //
-svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_s16,,)(pg, base, vnum);
 }
@@ -265,7 +273,7 @@ svint16_t test_svld1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
 //
-svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_s32,,)(pg, base, vnum);
 }
@@ -290,7 +298,7 @@ svint32_t test_svld1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
 //
-svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_s64,,)(pg, base, vnum);
 }
@@ -313,7 +321,7 @@ svint64_t test_svld1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP3]]
 //
-svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_u8,,)(pg, base, vnum);
 }
@@ -338,7 +346,7 @@ svuint8_t test_svld1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP4]]
 //
-svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_u16,,)(pg, base, vnum);
 }
@@ -363,7 +371,7 @@ svuint16_t test_svld1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
 //
-svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_u32,,)(pg, base, vnum);
 }
@@ -388,7 +396,7 @@ svuint32_t test_svld1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP4]]
 //
-svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_u64,,)(pg, base, vnum);
 }
@@ -413,7 +421,7 @@ svuint64_t test_svld1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 8 x half> @llvm.masked.load.nxv8f16.p0(ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP4]]
 //
-svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_f16,,)(pg, base, vnum);
 }
@@ -438,7 +446,7 @@ svfloat16_t test_svld1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP4]]
 //
-svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_f32,,)(pg, base, vnum);
 }
@@ -463,11 +471,13 @@ svfloat32_t test_svld1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> zeroinitializer)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP4]]
 //
-svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+svfloat64_t test_svld1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1_vnum,_f64,,)(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME
+
 // CHECK-LABEL: @test_svld1_gather_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -1152,3 +1162,4 @@ svfloat64_t test_svld1_gather_u64base_index_f64(svbool_t pg, svuint64_t bases, i
   return SVE_ACLE_FUNC(svld1_gather, _u64base, _index_f64, )(pg, bases, index);
 }
 
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c
index c015614056e7e..d50b0269e5297 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq-bfloat.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1rq.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svld1rq_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16_t test_svld1rq_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_bf16,,)(pg, base);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c
index 66528ae771af4..9784e1b6b8001 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1rq.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svld1rq_s8(svbool_t pg, const int8_t *base)
+svint8_t test_svld1rq_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_s8,,)(pg, base);
 }
@@ -42,7 +50,7 @@ svint8_t test_svld1rq_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svld1rq_s16(svbool_t pg, const int16_t *base)
+svint16_t test_svld1rq_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_s16,,)(pg, base);
 }
@@ -59,7 +67,7 @@ svint16_t test_svld1rq_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svld1rq_s32(svbool_t pg, const int32_t *base)
+svint32_t test_svld1rq_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_s32,,)(pg, base);
 }
@@ -76,7 +84,7 @@ svint32_t test_svld1rq_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svld1rq_s64(svbool_t pg, const int64_t *base)
+svint64_t test_svld1rq_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_s64,,)(pg, base);
 }
@@ -91,7 +99,7 @@ svint64_t test_svld1rq_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ld1rq.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svld1rq_u8(svbool_t pg, const uint8_t *base)
+svuint8_t test_svld1rq_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_u8,,)(pg, base);
 }
@@ -108,7 +116,7 @@ svuint8_t test_svld1rq_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1rq.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svld1rq_u16(svbool_t pg, const uint16_t *base)
+svuint16_t test_svld1rq_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_u16,,)(pg, base);
 }
@@ -125,7 +133,7 @@ svuint16_t test_svld1rq_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1rq.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svld1rq_u32(svbool_t pg, const uint32_t *base)
+svuint32_t test_svld1rq_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_u32,,)(pg, base);
 }
@@ -142,7 +150,7 @@ svuint32_t test_svld1rq_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1rq.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svld1rq_u64(svbool_t pg, const uint64_t *base)
+svuint64_t test_svld1rq_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_u64,,)(pg, base);
 }
@@ -159,7 +167,7 @@ svuint64_t test_svld1rq_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1rq.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svld1rq_f16(svbool_t pg, const float16_t *base)
+svfloat16_t test_svld1rq_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_f16,,)(pg, base);
 }
@@ -176,7 +184,7 @@ svfloat16_t test_svld1rq_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1rq.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svld1rq_f32(svbool_t pg, const float32_t *base)
+svfloat32_t test_svld1rq_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_f32,,)(pg, base);
 }
@@ -193,7 +201,7 @@ svfloat32_t test_svld1rq_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1rq.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svld1rq_f64(svbool_t pg, const float64_t *base)
+svfloat64_t test_svld1rq_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld1rq,_f64,,)(pg, base);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
index 560a18146b089..59d1e103db389 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
+svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_s16(pg, base);
 }
@@ -47,7 +55,7 @@ svint16_t test_svld1sb_s16(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
+svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_s32(pg, base);
 }
@@ -66,7 +74,7 @@ svint32_t test_svld1sb_s32(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
+svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_s64(pg, base);
 }
@@ -85,7 +93,7 @@ svint64_t test_svld1sb_s64(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
+svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_u16(pg, base);
 }
@@ -104,7 +112,7 @@ svuint16_t test_svld1sb_u16(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
+svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_u32(pg, base);
 }
@@ -123,7 +131,7 @@ svuint32_t test_svld1sb_u32(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
+svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return svld1sb_u64(pg, base);
 }
@@ -150,7 +158,7 @@ svuint64_t test_svld1sb_u64(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
 //
-svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
+svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_s16(pg, base, vnum);
 }
@@ -177,7 +185,7 @@ svint16_t test_svld1sb_vnum_s16(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
+svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_s32(pg, base, vnum);
 }
@@ -204,7 +212,7 @@ svint32_t test_svld1sb_vnum_s32(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
+svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_s64(pg, base, vnum);
 }
@@ -231,7 +239,7 @@ svint64_t test_svld1sb_vnum_s64(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
 //
-svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
+svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_u16(pg, base, vnum);
 }
@@ -258,7 +266,7 @@ svuint16_t test_svld1sb_vnum_u16(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
+svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_u32(pg, base, vnum);
 }
@@ -285,11 +293,13 @@ svuint32_t test_svld1sb_vnum_u32(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum)
+svuint64_t test_svld1sb_vnum_u64(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sb_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1sb_gather_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -577,3 +587,5 @@ svuint32_t test_svld1sb_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
 svuint64_t test_svld1sb_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
   return SVE_ACLE_FUNC(svld1sb_gather, _u64base, _offset_u64, )(pg, bases, offset);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
index b2f1646357696..1df3f6adbc1c6 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sh.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
+svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return svld1sh_s32(pg, base);
 }
@@ -47,7 +55,7 @@ svint32_t test_svld1sh_s32(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
+svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return svld1sh_s64(pg, base);
 }
@@ -66,7 +74,7 @@ svint64_t test_svld1sh_s64(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
+svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return svld1sh_u32(pg, base);
 }
@@ -85,7 +93,7 @@ svuint32_t test_svld1sh_u32(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
+svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return svld1sh_u64(pg, base);
 }
@@ -112,7 +120,7 @@ svuint64_t test_svld1sh_u64(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
+svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sh_vnum_s32(pg, base, vnum);
 }
@@ -139,7 +147,7 @@ svint32_t test_svld1sh_vnum_s32(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
+svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sh_vnum_s64(pg, base, vnum);
 }
@@ -166,7 +174,7 @@ svint64_t test_svld1sh_vnum_s64(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
+svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sh_vnum_u32(pg, base, vnum);
 }
@@ -193,11 +201,13 @@ svuint32_t test_svld1sh_vnum_u32(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum)
+svuint64_t test_svld1sh_vnum_u64(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sh_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1sh_gather_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -709,3 +719,5 @@ svuint32_t test_svld1sh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
 svuint64_t test_svld1sh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
   return SVE_ACLE_FUNC(svld1sh_gather, _u64base, _index_u64, )(pg, bases, index);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
index 42cc5bf83a4a4..e7d77e62d44c1 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1sw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
+svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return svld1sw_s64(pg, base);
 }
@@ -47,7 +55,7 @@ svint64_t test_svld1sw_s64(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = sext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
+svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return svld1sw_u64(pg, base);
 }
@@ -74,7 +82,7 @@ svuint64_t test_svld1sw_u64(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
+svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sw_vnum_s64(pg, base, vnum);
 }
@@ -101,11 +109,13 @@ svint64_t test_svld1sw_vnum_s64(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = sext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum)
+svuint64_t test_svld1sw_vnum_u64(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1sw_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1sw_gather_u64base_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -361,3 +371,5 @@ svint64_t test_svld1sw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
 svuint64_t test_svld1sw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
   return SVE_ACLE_FUNC(svld1sw_gather, _u64base, _index_u64, )(pg, bases, index);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
index 50f81d1614af1..31906b4e5f646 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ub.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
+svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_s16(pg, base);
 }
@@ -47,7 +55,7 @@ svint16_t test_svld1ub_s16(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
+svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_s32(pg, base);
 }
@@ -66,7 +74,7 @@ svint32_t test_svld1ub_s32(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
+svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_s64(pg, base);
 }
@@ -85,7 +93,7 @@ svint64_t test_svld1ub_s64(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 8 x i8> [[TMP1]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
+svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_u16(pg, base);
 }
@@ -104,7 +112,7 @@ svuint16_t test_svld1ub_u16(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i8> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
+svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_u32(pg, base);
 }
@@ -123,7 +131,7 @@ svuint32_t test_svld1ub_u32(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i8> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
+svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return svld1ub_u64(pg, base);
 }
@@ -150,7 +158,7 @@ svuint64_t test_svld1ub_u64(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
 //
-svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
+svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_s16(pg, base, vnum);
 }
@@ -177,7 +185,7 @@ svint16_t test_svld1ub_vnum_s16(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
+svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_s32(pg, base, vnum);
 }
@@ -204,7 +212,7 @@ svint32_t test_svld1ub_vnum_s32(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
+svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_s64(pg, base, vnum);
 }
@@ -231,7 +239,7 @@ svint64_t test_svld1ub_vnum_s64(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 8 x i8> [[TMP4]] to <vscale x 8 x i16>
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
 //
-svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_u16(pg, base, vnum);
 }
@@ -258,7 +266,7 @@ svuint16_t test_svld1ub_vnum_u16(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i8> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_u32(pg, base, vnum);
 }
@@ -285,11 +293,13 @@ svuint32_t test_svld1ub_vnum_u32(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i8> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint64_t test_svld1ub_vnum_u64(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1ub_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1ub_gather_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -577,3 +587,5 @@ svuint32_t test_svld1ub_gather_u32base_offset_u32(svbool_t pg, svuint32_t bases,
 svuint64_t test_svld1ub_gather_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64_t offset) {
   return SVE_ACLE_FUNC(svld1ub_gather, _u64base, _offset_u64, )(pg, bases, offset);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
index 2efccced81f9c..e6553e193109f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uh.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
+svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return svld1uh_s32(pg, base);
 }
@@ -47,7 +55,7 @@ svint32_t test_svld1uh_s32(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
+svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return svld1uh_s64(pg, base);
 }
@@ -66,7 +74,7 @@ svint64_t test_svld1uh_s64(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 4 x i16> [[TMP1]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
+svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return svld1uh_u32(pg, base);
 }
@@ -85,7 +93,7 @@ svuint32_t test_svld1uh_u32(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i16> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
+svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return svld1uh_u64(pg, base);
 }
@@ -112,7 +120,7 @@ svuint64_t test_svld1uh_u64(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
+svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uh_vnum_s32(pg, base, vnum);
 }
@@ -139,7 +147,7 @@ svint32_t test_svld1uh_vnum_s32(svbool_t pg, const uint16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
+svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uh_vnum_s64(pg, base, vnum);
 }
@@ -166,7 +174,7 @@ svint64_t test_svld1uh_vnum_s64(svbool_t pg, const uint16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 4 x i16> [[TMP4]] to <vscale x 4 x i32>
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
 //
-svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uh_vnum_u32(pg, base, vnum);
 }
@@ -193,11 +201,13 @@ svuint32_t test_svld1uh_vnum_u32(svbool_t pg, const uint16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i16> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint64_t test_svld1uh_vnum_u64(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uh_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1uh_gather_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -709,3 +719,5 @@ svuint32_t test_svld1uh_gather_u32base_index_u32(svbool_t pg, svuint32_t bases,
 svuint64_t test_svld1uh_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
   return SVE_ACLE_FUNC(svld1uh_gather, _u64base, _index_u64, )(pg, bases, index);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
index ee5a415650562..b7ffb86daac23 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1uw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
+svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return svld1uw_s64(pg, base);
 }
@@ -47,7 +55,7 @@ svint64_t test_svld1uw_s64(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = zext <vscale x 2 x i32> [[TMP1]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
+svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return svld1uw_u64(pg, base);
 }
@@ -74,7 +82,7 @@ svuint64_t test_svld1uw_u64(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
+svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uw_vnum_s64(pg, base, vnum);
 }
@@ -101,11 +109,13 @@ svint64_t test_svld1uw_vnum_s64(svbool_t pg, const uint32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = zext <vscale x 2 x i32> [[TMP4]] to <vscale x 2 x i64>
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP5]]
 //
-svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint64_t test_svld1uw_vnum_u64(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return svld1uw_vnum_u64(pg, base, vnum);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svld1uw_gather_u64base_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -361,3 +371,5 @@ svint64_t test_svld1uw_gather_u64base_index_s64(svbool_t pg, svuint64_t bases, i
 svuint64_t test_svld1uw_gather_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_t index) {
   return SVE_ACLE_FUNC(svld1uw_gather, _u64base, _index_u64, )(pg, bases, index);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c
index 0a7649a23f9a8..fcae89c50b0e6 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2-bfloat.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -35,7 +42,7 @@
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP3]], <vscale x 8 x bfloat> [[TMP4]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP5]]
 //
-svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_bf16,,)(pg, base);
 }
@@ -63,7 +70,7 @@ svbfloat16x2_t test_svld2_bf16(svbool_t pg, const bfloat16_t *base)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x bfloat> @llvm.vector.insert.nxv16bf16.nxv8bf16(<vscale x 16 x bfloat> [[TMP4]], <vscale x 8 x bfloat> [[TMP5]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> [[TMP6]]
 //
-svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
+svbfloat16x2_t test_svld2_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_bf16,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c
index 50bbc144be4fa..992b51fa25123 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld2.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -33,7 +41,7 @@
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
 //
-svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base)
+svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_s8,,)(pg, base);
 }
@@ -58,7 +66,7 @@ svint8x2_t test_svld2_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
 //
-svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base)
+svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_s16,,)(pg, base);
 }
@@ -83,7 +91,7 @@ svint16x2_t test_svld2_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
 //
-svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base)
+svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_s32,,)(pg, base);
 }
@@ -108,7 +116,7 @@ svint32x2_t test_svld2_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
 //
-svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base)
+svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_s64,,)(pg, base);
 }
@@ -131,7 +139,7 @@ svint64x2_t test_svld2_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP4:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP4]]
 //
-svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base)
+svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_u8,,)(pg, base);
 }
@@ -156,7 +164,7 @@ svuint8x2_t test_svld2_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP3]], <vscale x 8 x i16> [[TMP4]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP5]]
 //
-svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base)
+svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_u16,,)(pg, base);
 }
@@ -181,7 +189,7 @@ svuint16x2_t test_svld2_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP4]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP5]]
 //
-svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base)
+svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_u32,,)(pg, base);
 }
@@ -206,7 +214,7 @@ svuint32x2_t test_svld2_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP3]], <vscale x 2 x i64> [[TMP4]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP5]]
 //
-svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base)
+svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_u64,,)(pg, base);
 }
@@ -231,7 +239,7 @@ svuint64x2_t test_svld2_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP3]], <vscale x 8 x half> [[TMP4]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP5]]
 //
-svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base)
+svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_f16,,)(pg, base);
 }
@@ -256,7 +264,7 @@ svfloat16x2_t test_svld2_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP3]], <vscale x 4 x float> [[TMP4]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP5]]
 //
-svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base)
+svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_f32,,)(pg, base);
 }
@@ -281,7 +289,7 @@ svfloat32x2_t test_svld2_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP4]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP5]]
 //
-svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base)
+svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2,_f64,,)(pg, base);
 }
@@ -306,7 +314,7 @@ svfloat64x2_t test_svld2_f64(svbool_t pg, const float64_t *base)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
 //
-svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_s8,,)(pg, base, vnum);
 }
@@ -333,7 +341,7 @@ svint8x2_t test_svld2_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
 //
-svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_s16,,)(pg, base, vnum);
 }
@@ -360,7 +368,7 @@ svint16x2_t test_svld2_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
 //
-svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_s32,,)(pg, base, vnum);
 }
@@ -387,7 +395,7 @@ svint32x2_t test_svld2_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
 //
-svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_s64,,)(pg, base, vnum);
 }
@@ -412,7 +420,7 @@ svint64x2_t test_svld2_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP5:%.*]] = tail call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> [[TMP5]]
 //
-svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_u8,,)(pg, base, vnum);
 }
@@ -439,7 +447,7 @@ svuint8x2_t test_svld2_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x i16> @llvm.vector.insert.nxv16i16.nxv8i16(<vscale x 16 x i16> [[TMP4]], <vscale x 8 x i16> [[TMP5]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> [[TMP6]]
 //
-svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_u16,,)(pg, base, vnum);
 }
@@ -466,7 +474,7 @@ svuint16x2_t test_svld2_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> [[TMP6]]
 //
-svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_u32,,)(pg, base, vnum);
 }
@@ -493,7 +501,7 @@ svuint32x2_t test_svld2_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x i64> @llvm.vector.insert.nxv4i64.nxv2i64(<vscale x 4 x i64> [[TMP4]], <vscale x 2 x i64> [[TMP5]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> [[TMP6]]
 //
-svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_u64,,)(pg, base, vnum);
 }
@@ -520,7 +528,7 @@ svuint64x2_t test_svld2_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 16 x half> @llvm.vector.insert.nxv16f16.nxv8f16(<vscale x 16 x half> [[TMP4]], <vscale x 8 x half> [[TMP5]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x half> [[TMP6]]
 //
-svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_f16,,)(pg, base, vnum);
 }
@@ -547,7 +555,7 @@ svfloat16x2_t test_svld2_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> [[TMP4]], <vscale x 4 x float> [[TMP5]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x float> [[TMP6]]
 //
-svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_f32,,)(pg, base, vnum);
 }
@@ -574,7 +582,7 @@ svfloat32x2_t test_svld2_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP4]], <vscale x 2 x double> [[TMP5]], i64 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x double> [[TMP6]]
 //
-svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+svfloat64x2_t test_svld2_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld2_vnum,_f64,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c
index ff04431fb87fd..9100d27534c1c 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -39,7 +47,7 @@
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP5]], <vscale x 8 x bfloat> [[TMP6]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP7]]
 //
-svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_bf16,,)(pg, base);
 }
@@ -70,7 +78,7 @@ svbfloat16x3_t test_svld3_bf16(svbool_t pg, const bfloat16_t *base)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x bfloat> @llvm.vector.insert.nxv24bf16.nxv8bf16(<vscale x 24 x bfloat> [[TMP6]], <vscale x 8 x bfloat> [[TMP7]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x bfloat> [[TMP8]]
 //
-svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
+svbfloat16x3_t test_svld3_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_bf16,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c
index 753bf39d6561d..10206b5362e11 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld3.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -37,7 +44,7 @@
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP6]]
 //
-svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base)
+svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_s8,,)(pg, base);
 }
@@ -66,7 +73,7 @@ svint8x3_t test_svld3_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP7]]
 //
-svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base)
+svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_s16,,)(pg, base);
 }
@@ -95,7 +102,7 @@ svint16x3_t test_svld3_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP7]]
 //
-svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base)
+svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_s32,,)(pg, base);
 }
@@ -124,7 +131,7 @@ svint32x3_t test_svld3_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP7]]
 //
-svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base)
+svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_s64,,)(pg, base);
 }
@@ -151,7 +158,7 @@ svint64x3_t test_svld3_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP6:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP5]], i64 32)
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP6]]
 //
-svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base)
+svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_u8,,)(pg, base);
 }
@@ -180,7 +187,7 @@ svuint8x3_t test_svld3_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP5]], <vscale x 8 x i16> [[TMP6]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP7]]
 //
-svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base)
+svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_u16,,)(pg, base);
 }
@@ -209,7 +216,7 @@ svuint16x3_t test_svld3_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP5]], <vscale x 4 x i32> [[TMP6]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP7]]
 //
-svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base)
+svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_u32,,)(pg, base);
 }
@@ -238,7 +245,7 @@ svuint32x3_t test_svld3_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP5]], <vscale x 2 x i64> [[TMP6]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP7]]
 //
-svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base)
+svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_u64,,)(pg, base);
 }
@@ -267,7 +274,7 @@ svuint64x3_t test_svld3_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP5]], <vscale x 8 x half> [[TMP6]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x half> [[TMP7]]
 //
-svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base)
+svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_f16,,)(pg, base);
 }
@@ -296,7 +303,7 @@ svfloat16x3_t test_svld3_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP5]], <vscale x 4 x float> [[TMP6]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x float> [[TMP7]]
 //
-svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base)
+svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_f32,,)(pg, base);
 }
@@ -325,7 +332,7 @@ svfloat32x3_t test_svld3_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP5]], <vscale x 2 x double> [[TMP6]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x double> [[TMP7]]
 //
-svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base)
+svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3,_f64,,)(pg, base);
 }
@@ -354,7 +361,7 @@ svfloat64x3_t test_svld3_f64(svbool_t pg, const float64_t *base)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
 //
-svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_s8,,)(pg, base, vnum);
 }
@@ -385,7 +392,7 @@ svint8x3_t test_svld3_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
 //
-svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_s16,,)(pg, base, vnum);
 }
@@ -416,7 +423,7 @@ svint16x3_t test_svld3_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
 //
-svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_s32,,)(pg, base, vnum);
 }
@@ -447,7 +454,7 @@ svint32x3_t test_svld3_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
 //
-svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_s64,,)(pg, base, vnum);
 }
@@ -476,7 +483,7 @@ svint64x3_t test_svld3_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP7:%.*]] = tail call <vscale x 48 x i8> @llvm.vector.insert.nxv48i8.nxv16i8(<vscale x 48 x i8> [[TMP5]], <vscale x 16 x i8> [[TMP6]], i64 32)
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> [[TMP7]]
 //
-svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_u8,,)(pg, base, vnum);
 }
@@ -507,7 +514,7 @@ svuint8x3_t test_svld3_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> [[TMP6]], <vscale x 8 x i16> [[TMP7]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> [[TMP8]]
 //
-svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_u16,,)(pg, base, vnum);
 }
@@ -538,7 +545,7 @@ svuint16x3_t test_svld3_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x i32> @llvm.vector.insert.nxv12i32.nxv4i32(<vscale x 12 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> [[TMP8]]
 //
-svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_u32,,)(pg, base, vnum);
 }
@@ -569,7 +576,7 @@ svuint32x3_t test_svld3_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x i64> @llvm.vector.insert.nxv6i64.nxv2i64(<vscale x 6 x i64> [[TMP6]], <vscale x 2 x i64> [[TMP7]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> [[TMP8]]
 //
-svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_u64,,)(pg, base, vnum);
 }
@@ -600,7 +607,7 @@ svuint64x3_t test_svld3_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 24 x half> @llvm.vector.insert.nxv24f16.nxv8f16(<vscale x 24 x half> [[TMP6]], <vscale x 8 x half> [[TMP7]], i64 16)
 // CPP-CHECK-NEXT:    ret <vscale x 24 x half> [[TMP8]]
 //
-svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_f16,,)(pg, base, vnum);
 }
@@ -631,7 +638,7 @@ svfloat16x3_t test_svld3_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 12 x float> @llvm.vector.insert.nxv12f32.nxv4f32(<vscale x 12 x float> [[TMP6]], <vscale x 4 x float> [[TMP7]], i64 8)
 // CPP-CHECK-NEXT:    ret <vscale x 12 x float> [[TMP8]]
 //
-svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_f32,,)(pg, base, vnum);
 }
@@ -662,7 +669,7 @@ svfloat32x3_t test_svld3_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 6 x double> @llvm.vector.insert.nxv6f64.nxv2f64(<vscale x 6 x double> [[TMP6]], <vscale x 2 x double> [[TMP7]], i64 4)
 // CPP-CHECK-NEXT:    ret <vscale x 6 x double> [[TMP8]]
 //
-svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+svfloat64x3_t test_svld3_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld3_vnum,_f64,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c
index c6063872c63f6..0f21ffdb6f709 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4-bfloat.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -43,7 +50,7 @@
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP7]], <vscale x 8 x bfloat> [[TMP8]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP9]]
 //
-svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_bf16,,)(pg, base);
 }
@@ -78,7 +85,7 @@ svbfloat16x4_t test_svld4_bf16(svbool_t pg, const bfloat16_t *base)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x bfloat> @llvm.vector.insert.nxv32bf16.nxv8bf16(<vscale x 32 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[TMP9]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> [[TMP10]]
 //
-svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
+svbfloat16x4_t test_svld4_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_bf16,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c
index 6920813c44a26..06e0730788653 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld4.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -41,7 +48,7 @@
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
 //
-svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base)
+svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_s8,,)(pg, base);
 }
@@ -74,7 +81,7 @@ svint8x4_t test_svld4_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
 //
-svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base)
+svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_s16,,)(pg, base);
 }
@@ -107,7 +114,7 @@ svint16x4_t test_svld4_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
 //
-svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base)
+svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_s32,,)(pg, base);
 }
@@ -140,7 +147,7 @@ svint32x4_t test_svld4_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
 //
-svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base)
+svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_s64,,)(pg, base);
 }
@@ -171,7 +178,7 @@ svint64x4_t test_svld4_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP8:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP6]], <vscale x 16 x i8> [[TMP7]], i64 48)
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP8]]
 //
-svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base)
+svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_u8,,)(pg, base);
 }
@@ -204,7 +211,7 @@ svuint8x4_t test_svld4_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP7]], <vscale x 8 x i16> [[TMP8]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP9]]
 //
-svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base)
+svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_u16,,)(pg, base);
 }
@@ -237,7 +244,7 @@ svuint16x4_t test_svld4_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP7]], <vscale x 4 x i32> [[TMP8]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP9]]
 //
-svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base)
+svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_u32,,)(pg, base);
 }
@@ -270,7 +277,7 @@ svuint32x4_t test_svld4_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP7]], <vscale x 2 x i64> [[TMP8]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP9]]
 //
-svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base)
+svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_u64,,)(pg, base);
 }
@@ -303,7 +310,7 @@ svuint64x4_t test_svld4_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP7]], <vscale x 8 x half> [[TMP8]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP9]]
 //
-svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base)
+svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_f16,,)(pg, base);
 }
@@ -336,7 +343,7 @@ svfloat16x4_t test_svld4_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP7]], <vscale x 4 x float> [[TMP8]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP9]]
 //
-svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base)
+svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_f32,,)(pg, base);
 }
@@ -369,7 +376,7 @@ svfloat32x4_t test_svld4_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP7]], <vscale x 2 x double> [[TMP8]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP9]]
 //
-svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base)
+svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4,_f64,,)(pg, base);
 }
@@ -402,7 +409,7 @@ svfloat64x4_t test_svld4_f64(svbool_t pg, const float64_t *base)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
 //
-svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_s8,,)(pg, base, vnum);
 }
@@ -437,7 +444,7 @@ svint8x4_t test_svld4_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
 //
-svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_s16,,)(pg, base, vnum);
 }
@@ -472,7 +479,7 @@ svint16x4_t test_svld4_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
 //
-svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_s32,,)(pg, base, vnum);
 }
@@ -507,7 +514,7 @@ svint32x4_t test_svld4_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
 //
-svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_s64,,)(pg, base, vnum);
 }
@@ -540,7 +547,7 @@ svint64x4_t test_svld4_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP9:%.*]] = tail call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> [[TMP7]], <vscale x 16 x i8> [[TMP8]], i64 48)
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> [[TMP9]]
 //
-svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_u8,,)(pg, base, vnum);
 }
@@ -575,7 +582,7 @@ svuint8x4_t test_svld4_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x i16> @llvm.vector.insert.nxv32i16.nxv8i16(<vscale x 32 x i16> [[TMP8]], <vscale x 8 x i16> [[TMP9]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> [[TMP10]]
 //
-svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_u16,,)(pg, base, vnum);
 }
@@ -610,7 +617,7 @@ svuint16x4_t test_svld4_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> [[TMP10]]
 //
-svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_u32,,)(pg, base, vnum);
 }
@@ -645,7 +652,7 @@ svuint32x4_t test_svld4_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x i64> @llvm.vector.insert.nxv8i64.nxv2i64(<vscale x 8 x i64> [[TMP8]], <vscale x 2 x i64> [[TMP9]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> [[TMP10]]
 //
-svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_u64,,)(pg, base, vnum);
 }
@@ -680,7 +687,7 @@ svuint64x4_t test_svld4_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 32 x half> @llvm.vector.insert.nxv32f16.nxv8f16(<vscale x 32 x half> [[TMP8]], <vscale x 8 x half> [[TMP9]], i64 24)
 // CPP-CHECK-NEXT:    ret <vscale x 32 x half> [[TMP10]]
 //
-svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_f16,,)(pg, base, vnum);
 }
@@ -715,7 +722,7 @@ svfloat16x4_t test_svld4_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> [[TMP8]], <vscale x 4 x float> [[TMP9]], i64 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x float> [[TMP10]]
 //
-svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_f32,,)(pg, base, vnum);
 }
@@ -750,7 +757,7 @@ svfloat32x4_t test_svld4_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP10:%.*]] = tail call <vscale x 8 x double> @llvm.vector.insert.nxv8f64.nxv2f64(<vscale x 8 x double> [[TMP8]], <vscale x 2 x double> [[TMP9]], i64 6)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x double> [[TMP10]]
 //
-svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+svfloat64x4_t test_svld4_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svld4_vnum,_f64,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
index ee0b46db5ebcf..82d5bff2516d8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base)
+svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_bf16,,)(pg, base);
 }
@@ -46,7 +54,7 @@ svbfloat16_t test_svldnt1_bf16(svbool_t pg, const bfloat16_t *base)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svldnt1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum)
+svbfloat16_t test_svldnt1_vnum_bf16(svbool_t pg, const bfloat16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_bf16,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
index 37a41d5fd4ed5..d343c124fe6a7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ldnt1.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base)
+svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_s8,,)(pg, base);
 }
@@ -42,7 +50,7 @@ svint8_t test_svldnt1_s8(svbool_t pg, const int8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base)
+svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_s16,,)(pg, base);
 }
@@ -59,7 +67,7 @@ svint16_t test_svldnt1_s16(svbool_t pg, const int16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base)
+svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_s32,,)(pg, base);
 }
@@ -76,7 +84,7 @@ svint32_t test_svldnt1_s32(svbool_t pg, const int32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base)
+svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_s64,,)(pg, base);
 }
@@ -91,7 +99,7 @@ svint64_t test_svldnt1_s64(svbool_t pg, const int64_t *base)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base)
+svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_u8,,)(pg, base);
 }
@@ -108,7 +116,7 @@ svuint8_t test_svldnt1_u8(svbool_t pg, const uint8_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base)
+svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_u16,,)(pg, base);
 }
@@ -125,7 +133,7 @@ svuint16_t test_svldnt1_u16(svbool_t pg, const uint16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base)
+svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_u32,,)(pg, base);
 }
@@ -142,7 +150,7 @@ svuint32_t test_svldnt1_u32(svbool_t pg, const uint32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base)
+svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_u64,,)(pg, base);
 }
@@ -159,7 +167,7 @@ svuint64_t test_svldnt1_u64(svbool_t pg, const uint64_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base)
+svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_f16,,)(pg, base);
 }
@@ -176,7 +184,7 @@ svfloat16_t test_svldnt1_f16(svbool_t pg, const float16_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base)
+svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_f32,,)(pg, base);
 }
@@ -193,7 +201,7 @@ svfloat32_t test_svldnt1_f32(svbool_t pg, const float32_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base)
+svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1,_f64,,)(pg, base);
 }
@@ -210,7 +218,7 @@ svfloat64_t test_svldnt1_f64(svbool_t pg, const float64_t *base)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
+svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_s8,,)(pg, base, vnum);
 }
@@ -229,7 +237,7 @@ svint8_t test_svldnt1_vnum_s8(svbool_t pg, const int8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
+svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_s16,,)(pg, base, vnum);
 }
@@ -248,7 +256,7 @@ svint16_t test_svldnt1_vnum_s16(svbool_t pg, const int16_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
+svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_s32,,)(pg, base, vnum);
 }
@@ -267,7 +275,7 @@ svint32_t test_svldnt1_vnum_s32(svbool_t pg, const int32_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
+svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_s64,,)(pg, base, vnum);
 }
@@ -284,7 +292,7 @@ svint64_t test_svldnt1_vnum_s64(svbool_t pg, const int64_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
+svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_u8,,)(pg, base, vnum);
 }
@@ -303,7 +311,7 @@ svuint8_t test_svldnt1_vnum_u8(svbool_t pg, const uint8_t *base, int64_t vnum)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum)
+svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_u16,,)(pg, base, vnum);
 }
@@ -322,7 +330,7 @@ svuint16_t test_svldnt1_vnum_u16(svbool_t pg, const uint16_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum)
+svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_u32,,)(pg, base, vnum);
 }
@@ -341,7 +349,7 @@ svuint32_t test_svldnt1_vnum_u32(svbool_t pg, const uint32_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum)
+svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_u64,,)(pg, base, vnum);
 }
@@ -360,7 +368,7 @@ svuint64_t test_svldnt1_vnum_u64(svbool_t pg, const uint64_t *base, int64_t vnum
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum)
+svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_f16,,)(pg, base, vnum);
 }
@@ -379,7 +387,7 @@ svfloat16_t test_svldnt1_vnum_f16(svbool_t pg, const float16_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum)
+svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_f32,,)(pg, base, vnum);
 }
@@ -398,7 +406,7 @@ svfloat32_t test_svldnt1_vnum_f32(svbool_t pg, const float32_t *base, int64_t vn
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum)
+svfloat64_t test_svldnt1_vnum_f64(svbool_t pg, const float64_t *base, int64_t vnum) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svldnt1_vnum,_f64,,)(pg, base, vnum);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len-bfloat.c
index 1128a7310938e..049207514bc1d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len-bfloat.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_bf16(svbfloat16_t op)
+uint64_t test_svlen_bf16(svbfloat16_t op) MODE_ATTR
 {
   // expected-warning at +1 {{implicit declaration of function 'svlen_bf16'}}
   return SVE_ACLE_FUNC(svlen,_bf16,,)(op);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c
index 10675a2cc08ce..cca939296455e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_len.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_s8(svint8_t op)
+uint64_t test_svlen_s8(svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_s8,,)(op);
 }
@@ -43,7 +51,7 @@ uint64_t test_svlen_s8(svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_s16(svint16_t op)
+uint64_t test_svlen_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_s16,,)(op);
 }
@@ -60,7 +68,7 @@ uint64_t test_svlen_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_s32(svint32_t op)
+uint64_t test_svlen_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_s32,,)(op);
 }
@@ -77,7 +85,7 @@ uint64_t test_svlen_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_s64(svint64_t op)
+uint64_t test_svlen_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_s64,,)(op);
 }
@@ -94,7 +102,7 @@ uint64_t test_svlen_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 4
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_u8(svuint8_t op)
+uint64_t test_svlen_u8(svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_u8,,)(op);
 }
@@ -111,7 +119,7 @@ uint64_t test_svlen_u8(svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_u16(svuint16_t op)
+uint64_t test_svlen_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_u16,,)(op);
 }
@@ -128,7 +136,7 @@ uint64_t test_svlen_u16(svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_u32(svuint32_t op)
+uint64_t test_svlen_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_u32,,)(op);
 }
@@ -145,7 +153,7 @@ uint64_t test_svlen_u32(svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_u64(svuint64_t op)
+uint64_t test_svlen_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_u64,,)(op);
 }
@@ -162,7 +170,7 @@ uint64_t test_svlen_u64(svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_f16(svfloat16_t op)
+uint64_t test_svlen_f16(svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_f16,,)(op);
 }
@@ -179,7 +187,7 @@ uint64_t test_svlen_f16(svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_f32(svfloat32_t op)
+uint64_t test_svlen_f32(svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_f32,,)(op);
 }
@@ -196,7 +204,7 @@ uint64_t test_svlen_f32(svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 1
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svlen_f64(svfloat64_t op)
+uint64_t test_svlen_f64(svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlen,_f64,,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c
index 1c32eea466fd1..d916235059ea1 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsl.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svlsl_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svlsl_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svlsl_s8_z(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svlsl_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svlsl_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svlsl_s16_z(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svlsl_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svlsl_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svlsl_s32_z(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svlsl_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svlsl_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svlsl_s64_z(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svlsl_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsl_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svlsl_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svlsl_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsl_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svlsl_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svlsl_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsl_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svlsl_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svlsl_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsl_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svlsl_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svlsl_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svlsl_s8_m(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svlsl_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svlsl_s16_m(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svlsl_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svlsl_s32_m(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svlsl_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svlsl_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svlsl_s64_m(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsl_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsl_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svlsl_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsl_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsl_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svlsl_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsl_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsl_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svlsl_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svlsl_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsl_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svlsl_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2)
+svint8_t test_svlsl_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svlsl_s8_x(svbool_t pg, svint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2)
+svint16_t test_svlsl_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svlsl_s16_x(svbool_t pg, svint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2)
+svint32_t test_svlsl_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svlsl_s32_x(svbool_t pg, svint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svlsl_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2)
+svint64_t test_svlsl_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svlsl_s64_x(svbool_t pg, svint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsl_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsl_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svlsl_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsl_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsl_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svlsl_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsl_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsl_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svlsl_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svlsl_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsl_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl,_u64,_x,)(pg, op1, op2);
 }
@@ -438,7 +446,7 @@ svuint64_t test_svlsl_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svlsl_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svlsl_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s8,_z,)(pg, op1, op2);
 }
@@ -457,7 +465,7 @@ svint8_t test_svlsl_wide_s8_z(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svlsl_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svlsl_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s16,_z,)(pg, op1, op2);
 }
@@ -476,7 +484,7 @@ svint16_t test_svlsl_wide_s16_z(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svlsl_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svlsl_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s32,_z,)(pg, op1, op2);
 }
@@ -493,7 +501,7 @@ svint32_t test_svlsl_wide_s32_z(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svlsl_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsl_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u8,_z,)(pg, op1, op2);
 }
@@ -512,7 +520,7 @@ svuint8_t test_svlsl_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svlsl_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsl_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u16,_z,)(pg, op1, op2);
 }
@@ -531,7 +539,7 @@ svuint16_t test_svlsl_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svlsl_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsl_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u32,_z,)(pg, op1, op2);
 }
@@ -546,7 +554,7 @@ svuint32_t test_svlsl_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svlsl_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s8,_m,)(pg, op1, op2);
 }
@@ -563,7 +571,7 @@ svint8_t test_svlsl_wide_s8_m(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svlsl_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s16,_m,)(pg, op1, op2);
 }
@@ -580,7 +588,7 @@ svint16_t test_svlsl_wide_s16_m(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svlsl_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s32,_m,)(pg, op1, op2);
 }
@@ -595,7 +603,7 @@ svint32_t test_svlsl_wide_s32_m(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsl_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsl_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u8,_m,)(pg, op1, op2);
 }
@@ -612,7 +620,7 @@ svuint8_t test_svlsl_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsl_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsl_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u16,_m,)(pg, op1, op2);
 }
@@ -629,7 +637,7 @@ svuint16_t test_svlsl_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsl_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsl_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u32,_m,)(pg, op1, op2);
 }
@@ -644,7 +652,7 @@ svuint32_t test_svlsl_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2)
+svint8_t test_svlsl_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s8,_x,)(pg, op1, op2);
 }
@@ -661,7 +669,7 @@ svint8_t test_svlsl_wide_s8_x(svbool_t pg, svint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2)
+svint16_t test_svlsl_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s16,_x,)(pg, op1, op2);
 }
@@ -678,7 +686,7 @@ svint16_t test_svlsl_wide_s16_x(svbool_t pg, svint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2)
+svint32_t test_svlsl_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_s32,_x,)(pg, op1, op2);
 }
@@ -693,7 +701,7 @@ svint32_t test_svlsl_wide_s32_x(svbool_t pg, svint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsl_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsl_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u8,_x,)(pg, op1, op2);
 }
@@ -710,7 +718,7 @@ svuint8_t test_svlsl_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsl_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsl_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u16,_x,)(pg, op1, op2);
 }
@@ -727,7 +735,7 @@ svuint16_t test_svlsl_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsl_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsl_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_u32,_x,)(pg, op1, op2);
 }
@@ -746,7 +754,7 @@ svuint32_t test_svlsl_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svlsl_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_m,)(pg, op1, op2);
 }
@@ -767,7 +775,7 @@ svint8_t test_svlsl_wide_n_s8_m(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svlsl_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_m,)(pg, op1, op2);
 }
@@ -788,7 +796,7 @@ svint16_t test_svlsl_wide_n_s16_m(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svlsl_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_m,)(pg, op1, op2);
 }
@@ -809,7 +817,7 @@ svint32_t test_svlsl_wide_n_s32_m(svbool_t pg, svint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svlsl_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svlsl_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_z,)(pg, op1, op2);
 }
@@ -832,7 +840,7 @@ svint8_t test_svlsl_wide_n_s8_z(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svlsl_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svlsl_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_z,)(pg, op1, op2);
 }
@@ -855,7 +863,7 @@ svint16_t test_svlsl_wide_n_s16_z(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svlsl_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svlsl_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_z,)(pg, op1, op2);
 }
@@ -874,7 +882,7 @@ svint32_t test_svlsl_wide_n_s32_z(svbool_t pg, svint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svlsl_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2)
+svint8_t test_svlsl_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s8,_x,)(pg, op1, op2);
 }
@@ -895,7 +903,7 @@ svint8_t test_svlsl_wide_n_s8_x(svbool_t pg, svint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svlsl_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2)
+svint16_t test_svlsl_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s16,_x,)(pg, op1, op2);
 }
@@ -916,7 +924,7 @@ svint16_t test_svlsl_wide_n_s16_x(svbool_t pg, svint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svlsl_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2)
+svint32_t test_svlsl_wide_n_s32_x(svbool_t pg, svint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsl_wide,_n_s32,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c
index 5efba57d45419..6244a851619f5 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lsr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svlsr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svuint8_t test_svlsr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svlsr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svuint16_t test_svlsr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svlsr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svuint32_t test_svlsr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svlsr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u64,_z,)(pg, op1, op2);
 }
@@ -98,7 +106,7 @@ svuint64_t test_svlsr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u8,_m,)(pg, op1, op2);
 }
@@ -115,7 +123,7 @@ svuint8_t test_svlsr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u16,_m,)(pg, op1, op2);
 }
@@ -132,7 +140,7 @@ svuint16_t test_svlsr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u32,_m,)(pg, op1, op2);
 }
@@ -149,7 +157,7 @@ svuint32_t test_svlsr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svlsr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u64,_m,)(pg, op1, op2);
 }
@@ -164,7 +172,7 @@ svuint64_t test_svlsr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svlsr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u8,_x,)(pg, op1, op2);
 }
@@ -181,7 +189,7 @@ svuint8_t test_svlsr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svlsr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u16,_x,)(pg, op1, op2);
 }
@@ -198,7 +206,7 @@ svuint16_t test_svlsr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svlsr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u32,_x,)(pg, op1, op2);
 }
@@ -215,7 +223,7 @@ svuint32_t test_svlsr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svlsr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svlsr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr,_u64,_x,)(pg, op1, op2);
 }
@@ -232,7 +240,7 @@ svuint64_t test_svlsr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svlsr_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsr_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u8,_z,)(pg, op1, op2);
 }
@@ -251,7 +259,7 @@ svuint8_t test_svlsr_wide_u8_z(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svlsr_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsr_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u16,_z,)(pg, op1, op2);
 }
@@ -270,7 +278,7 @@ svuint16_t test_svlsr_wide_u16_z(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svlsr_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsr_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u32,_z,)(pg, op1, op2);
 }
@@ -285,7 +293,7 @@ svuint32_t test_svlsr_wide_u32_z(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsr_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u8,_m,)(pg, op1, op2);
 }
@@ -302,7 +310,7 @@ svuint8_t test_svlsr_wide_u8_m(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsr_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u16,_m,)(pg, op1, op2);
 }
@@ -319,7 +327,7 @@ svuint16_t test_svlsr_wide_u16_m(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsr_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u32,_m,)(pg, op1, op2);
 }
@@ -334,7 +342,7 @@ svuint32_t test_svlsr_wide_u32_m(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2)
+svuint8_t test_svlsr_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u8,_x,)(pg, op1, op2);
 }
@@ -351,7 +359,7 @@ svuint8_t test_svlsr_wide_u8_x(svbool_t pg, svuint8_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2)
+svuint16_t test_svlsr_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u16,_x,)(pg, op1, op2);
 }
@@ -368,7 +376,7 @@ svuint16_t test_svlsr_wide_u16_x(svbool_t pg, svuint16_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2)
+svuint32_t test_svlsr_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_u32,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint32_t test_svlsr_wide_u32_x(svbool_t pg, svuint32_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2)
+svuint8_t test_svlsr_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_m,)(pg, op1, op2);
 }
@@ -408,7 +416,7 @@ svuint8_t test_svlsr_wide_n_u8_m(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2)
+svuint16_t test_svlsr_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_m,)(pg, op1, op2);
 }
@@ -429,7 +437,7 @@ svuint16_t test_svlsr_wide_n_u16_m(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2)
+svuint32_t test_svlsr_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_m,)(pg, op1, op2);
 }
@@ -450,7 +458,7 @@ svuint32_t test_svlsr_wide_n_u32_m(svbool_t pg, svuint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svlsr_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2)
+svuint8_t test_svlsr_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_z,)(pg, op1, op2);
 }
@@ -473,7 +481,7 @@ svuint8_t test_svlsr_wide_n_u8_z(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svlsr_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2)
+svuint16_t test_svlsr_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_z,)(pg, op1, op2);
 }
@@ -496,7 +504,7 @@ svuint16_t test_svlsr_wide_n_u16_z(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svlsr_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2)
+svuint32_t test_svlsr_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_z,)(pg, op1, op2);
 }
@@ -515,7 +523,7 @@ svuint32_t test_svlsr_wide_n_u32_z(svbool_t pg, svuint32_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svlsr_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2)
+svuint8_t test_svlsr_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u8,_x,)(pg, op1, op2);
 }
@@ -536,7 +544,7 @@ svuint8_t test_svlsr_wide_n_u8_x(svbool_t pg, svuint8_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svlsr_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2)
+svuint16_t test_svlsr_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u16,_x,)(pg, op1, op2);
 }
@@ -557,7 +565,7 @@ svuint16_t test_svlsr_wide_n_u16_x(svbool_t pg, svuint16_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svlsr_wide_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t op2)
+svuint32_t test_svlsr_wide_n_u32_x(svbool_t pg, svuint32_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svlsr_wide,_n_u32,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c
index 0070faba95e34..f3d286689a864 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mad.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmad_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmad_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s8,_z,)(pg, op1, op2, op3);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmad_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmad_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmad_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s16,_z,)(pg, op1, op2, op3);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmad_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmad_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmad_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s32,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmad_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmad_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmad_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s64,_z,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmad_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmad_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmad_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u8,_z,)(pg, op1, op2, op3);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmad_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmad_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmad_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u16,_z,)(pg, op1, op2, op3);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmad_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmad_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmad_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u32,_z,)(pg, op1, op2, op3);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmad_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmad_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmad_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u64,_z,)(pg, op1, op2, op3);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmad_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmad_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmad_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s8,_m,)(pg, op1, op2, op3);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmad_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmad_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmad_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s16,_m,)(pg, op1, op2, op3);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmad_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmad_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmad_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s32,_m,)(pg, op1, op2, op3);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmad_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmad_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmad_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s64,_m,)(pg, op1, op2, op3);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmad_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmad_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmad_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u8,_m,)(pg, op1, op2, op3);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmad_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmad_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmad_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u16,_m,)(pg, op1, op2, op3);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmad_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmad_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmad_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u32,_m,)(pg, op1, op2, op3);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmad_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmad_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmad_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u64,_m,)(pg, op1, op2, op3);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmad_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmad_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmad_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s8,_x,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmad_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP3:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmad_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmad_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s16,_x,)(pg, op1, op2, op3);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmad_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP3:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmad_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmad_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s32,_x,)(pg, op1, op2, op3);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmad_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP3:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmad_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmad_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_s64,_x,)(pg, op1, op2, op3);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmad_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmad_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmad_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u8,_x,)(pg, op1, op2, op3);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmad_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP3:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmad_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmad_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u16,_x,)(pg, op1, op2, op3);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmad_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP3:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmad_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmad_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u32,_x,)(pg, op1, op2, op3);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmad_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP3:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmad_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmad_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_u64,_x,)(pg, op1, op2, op3);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmad_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmad_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmad_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s8,_z,)(pg, op1, op2, op3);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmad_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmad_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmad_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s16,_z,)(pg, op1, op2, op3);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmad_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmad_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmad_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s32,_z,)(pg, op1, op2, op3);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmad_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmad_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmad_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s64,_z,)(pg, op1, op2, op3);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmad_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmad_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmad_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u8,_z,)(pg, op1, op2, op3);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmad_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmad_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmad_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u16,_z,)(pg, op1, op2, op3);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmad_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmad_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmad_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u32,_z,)(pg, op1, op2, op3);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmad_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmad_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmad_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u64,_z,)(pg, op1, op2, op3);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmad_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmad_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmad_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s8,_m,)(pg, op1, op2, op3);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmad_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmad_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmad_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s16,_m,)(pg, op1, op2, op3);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmad_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmad_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmad_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s32,_m,)(pg, op1, op2, op3);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmad_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmad_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmad_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s64,_m,)(pg, op1, op2, op3);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmad_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mad.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmad_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmad_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u8,_m,)(pg, op1, op2, op3);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmad_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mad.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmad_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmad_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u16,_m,)(pg, op1, op2, op3);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmad_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmad_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmad_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u32,_m,)(pg, op1, op2, op3);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmad_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mad.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmad_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmad_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u64,_m,)(pg, op1, op2, op3);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmad_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmad_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmad_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s8,_x,)(pg, op1, op2, op3);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmad_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmad_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmad_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s16,_x,)(pg, op1, op2, op3);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmad_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmad_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmad_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s32,_x,)(pg, op1, op2, op3);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmad_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmad_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmad_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_s64,_x,)(pg, op1, op2, op3);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmad_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmad_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmad_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u8,_x,)(pg, op1, op2, op3);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmad_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmad_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmad_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u16,_x,)(pg, op1, op2, op3);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmad_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmad_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmad_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u32,_x,)(pg, op1, op2, op3);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmad_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmad_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmad_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_u64,_x,)(pg, op1, op2, op3);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmad_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP3:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP3:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP3:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmad,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c
index 2cf6cf3439b0d..1a8ee6ee425c5 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_max.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmax_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmax_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmax_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmax_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmax_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmax_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmax_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmax_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmax_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmax_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmax_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmax_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmax_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmax_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmax_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmax_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmax_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmax_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmax_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmax_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmax_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmax_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmax_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmax_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmax_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmax_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmax_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmax_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmax_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmax_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmax_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmax_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmax_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmax_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmax_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmax_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmax_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmax_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmax_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmax_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmax_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmax_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmax_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmax_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmax_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmax_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmax_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmax_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmax_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmax_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmax_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmax_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmax_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmax_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmax_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmax_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmax_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmax_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmax_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmax_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmax_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmax_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmax_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmax_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmax_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmax_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmax_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmax_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmax_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmax_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmax_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmax_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmax_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmax_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmax_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmax_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmax_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmax_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmax_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmax_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmax_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmax_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmax_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmax_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmax_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmax_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmax_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmax_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmax_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmax_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmax_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmax_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmax_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmax_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmax_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmax_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmax_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmax_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmax_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmax_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmax_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmax_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmax_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmax_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmax_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmax_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmax_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmax_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmax_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmax_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmax_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmax_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmax_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmax_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmax_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmax_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmax_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmax_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmax_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmax_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmax_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmax_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmax_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmax_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmax_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmax_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmax_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmax_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmax_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmax_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmax_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmax_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmax_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmax_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmax_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmax_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmax_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmax_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmax_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmax_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmax_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmax_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmax_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmax_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmax_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmax_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmax_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmax_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmax_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmax_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmax_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmax_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmax_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmax_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmax_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmax_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmax_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmax_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmax_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmax_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmax_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmax_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmax_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmax_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmax_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmax_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmax_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmax_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmax_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmax_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmax_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmax_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmax_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmax_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmax_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmax_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmax_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmax_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmax_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmax_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmax_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmax_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmax_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmax_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmax_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmax_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmax_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmax_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmax_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmax_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmax_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmax_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmax_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmax_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmax_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmax_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmax_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmax,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c
index 530717887d398..dc6f56b25b6d9 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnm.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmaxnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmaxnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f16,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svmaxnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmaxnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmaxnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f32,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svmaxnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmaxnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmaxnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f64,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svmaxnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmaxnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmaxnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f16,_m,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svmaxnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmaxnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmaxnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f32,_m,)(pg, op1, op2);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svmaxnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmaxnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmaxnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f64,_m,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svmaxnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmaxnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmaxnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f16,_x,)(pg, op1, op2);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svmaxnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmaxnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmaxnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f32,_x,)(pg, op1, op2);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svmaxnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmaxnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmaxnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_f64,_x,)(pg, op1, op2);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svmaxnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmaxnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmaxnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f16,_z,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svmaxnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmaxnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmaxnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f32,_z,)(pg, op1, op2);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svmaxnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmaxnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmaxnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f64,_z,)(pg, op1, op2);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svmaxnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmaxnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmaxnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f16,_m,)(pg, op1, op2);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svmaxnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmaxnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmaxnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f32,_m,)(pg, op1, op2);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svmaxnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmaxnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmaxnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f64,_m,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svmaxnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmaxnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmaxnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f16,_x,)(pg, op1, op2);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svmaxnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmaxnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmaxnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f32,_x,)(pg, op1, op2);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svmaxnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmaxnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmaxnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnm,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c
index 803bce2ee72c1..fd34dc853c342 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxnmv.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.fmaxnmv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svmaxnmv_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svmaxnmv_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmv,_f16,,)(pg, op);
 }
@@ -44,7 +52,7 @@ float16_t test_svmaxnmv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.fmaxnmv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svmaxnmv_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svmaxnmv_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmv,_f32,,)(pg, op);
 }
@@ -61,7 +69,7 @@ float32_t test_svmaxnmv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.fmaxnmv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svmaxnmv_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svmaxnmv_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxnmv,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c
index a49e6cb669c83..1308cd6f852c0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_maxv.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.smaxv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svmaxv_s8(svbool_t pg, svint8_t op)
+int8_t test_svmaxv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_s8,,)(pg, op);
 }
@@ -42,7 +50,7 @@ int8_t test_svmaxv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.smaxv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svmaxv_s16(svbool_t pg, svint16_t op)
+int16_t test_svmaxv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_s16,,)(pg, op);
 }
@@ -59,7 +67,7 @@ int16_t test_svmaxv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.smaxv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svmaxv_s32(svbool_t pg, svint32_t op)
+int32_t test_svmaxv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_s32,,)(pg, op);
 }
@@ -76,7 +84,7 @@ int32_t test_svmaxv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.smaxv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svmaxv_s64(svbool_t pg, svint64_t op)
+int64_t test_svmaxv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_s64,,)(pg, op);
 }
@@ -91,7 +99,7 @@ int64_t test_svmaxv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.umaxv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svmaxv_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svmaxv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_u8,,)(pg, op);
 }
@@ -108,7 +116,7 @@ uint8_t test_svmaxv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.umaxv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svmaxv_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svmaxv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_u16,,)(pg, op);
 }
@@ -125,7 +133,7 @@ uint16_t test_svmaxv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.umaxv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svmaxv_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svmaxv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_u32,,)(pg, op);
 }
@@ -142,7 +150,7 @@ uint32_t test_svmaxv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.umaxv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svmaxv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svmaxv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_u64,,)(pg, op);
 }
@@ -159,7 +167,7 @@ uint64_t test_svmaxv_u64(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.fmaxv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svmaxv_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svmaxv_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_f16,,)(pg, op);
 }
@@ -176,7 +184,7 @@ float16_t test_svmaxv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.fmaxv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svmaxv_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svmaxv_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_f32,,)(pg, op);
 }
@@ -193,7 +201,7 @@ float32_t test_svmaxv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.fmaxv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svmaxv_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svmaxv_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmaxv,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c
index 80c3dd15e8bd2..ba2dc81267e17 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_min.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmin_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmin_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmin_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmin_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmin_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmin_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmin_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmin_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmin_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmin_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmin_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmin_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmin_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmin_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmin_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmin_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmin_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmin_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmin_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmin_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmin_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmin_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmin_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmin_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmin_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmin_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmin_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmin_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmin_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmin_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmin_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmin_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmin_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmin_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmin_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmin_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmin_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmin_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmin_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmin_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmin_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmin_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmin_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmin_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmin_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmin_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmin_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmin_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmin_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmin_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmin_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmin_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmin_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmin_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmin_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmin_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmin_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmin_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmin_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmin_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmin_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmin_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmin_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmin_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmin_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmin_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmin_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmin_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmin_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmin_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmin_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmin_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmin_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmin_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmin_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmin_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmin_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmin_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmin_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmin_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmin_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmin_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmin_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmin_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmin_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmin_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmin_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmin_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmin_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmin_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmin_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmin_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmin_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmin_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmin_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmin_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmin_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmin_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmin_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmin_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmin_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmin_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmin_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmin_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmin_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmin_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmin_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmin_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmin_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmin_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmin_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmin_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmin_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmin_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmin_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmin_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmin_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmin_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmin_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmin_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmin_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmin_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmin_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmin_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmin_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmin_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmin_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmin_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmin_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmin_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmin_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmin_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmin_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmin_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmin_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmin_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmin_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmin_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmin_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmin_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmin_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmin_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmin_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmin_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmin_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmin_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmin_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmin_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmin_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmin_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmin_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmin_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmin_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmin_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmin_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmin_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmin_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmin_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmin_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmin_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmin_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmin_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmin_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmin_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmin_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmin_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmin_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmin_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmin_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmin_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmin_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmin_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmin_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmin_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmin_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmin_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmin_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmin_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmin_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmin_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmin_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmin_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmin_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmin_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmin_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmin_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmin_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmin_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmin_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmin_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmin_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmin_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmin_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmin_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmin_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmin_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmin_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmin,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c
index 127294f939afb..631273f72a658 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnm.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svminnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svminnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f16,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svminnm_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svminnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svminnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f32,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svminnm_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svminnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svminnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f64,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svminnm_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svminnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svminnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f16,_m,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svminnm_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svminnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svminnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f32,_m,)(pg, op1, op2);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svminnm_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svminnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svminnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f64,_m,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svminnm_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svminnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svminnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f16,_x,)(pg, op1, op2);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svminnm_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svminnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svminnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f32,_x,)(pg, op1, op2);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svminnm_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svminnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svminnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_f64,_x,)(pg, op1, op2);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svminnm_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svminnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svminnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f16,_z,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svminnm_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svminnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svminnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f32,_z,)(pg, op1, op2);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svminnm_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svminnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svminnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f64,_z,)(pg, op1, op2);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svminnm_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svminnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svminnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f16,_m,)(pg, op1, op2);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svminnm_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svminnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svminnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f32,_m,)(pg, op1, op2);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svminnm_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svminnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svminnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f64,_m,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svminnm_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svminnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svminnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f16,_x,)(pg, op1, op2);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svminnm_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svminnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svminnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f32,_x,)(pg, op1, op2);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svminnm_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svminnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svminnm_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnm,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c
index d4bc5fcb71853..58293685b1636 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minnmv.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.fminnmv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svminnmv_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svminnmv_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnmv,_f16,,)(pg, op);
 }
@@ -44,7 +52,7 @@ float16_t test_svminnmv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.fminnmv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svminnmv_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svminnmv_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnmv,_f32,,)(pg, op);
 }
@@ -61,7 +69,7 @@ float32_t test_svminnmv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.fminnmv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svminnmv_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svminnmv_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminnmv,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c
index e01e50340181c..acfa6a67a0974 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_minv.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.sminv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svminv_s8(svbool_t pg, svint8_t op)
+int8_t test_svminv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_s8,,)(pg, op);
 }
@@ -42,7 +50,7 @@ int8_t test_svminv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.sminv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svminv_s16(svbool_t pg, svint16_t op)
+int16_t test_svminv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_s16,,)(pg, op);
 }
@@ -59,7 +67,7 @@ int16_t test_svminv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sminv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svminv_s32(svbool_t pg, svint32_t op)
+int32_t test_svminv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_s32,,)(pg, op);
 }
@@ -76,7 +84,7 @@ int32_t test_svminv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sminv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svminv_s64(svbool_t pg, svint64_t op)
+int64_t test_svminv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_s64,,)(pg, op);
 }
@@ -91,7 +99,7 @@ int64_t test_svminv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.uminv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svminv_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svminv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_u8,,)(pg, op);
 }
@@ -108,7 +116,7 @@ uint8_t test_svminv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.uminv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svminv_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svminv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_u16,,)(pg, op);
 }
@@ -125,7 +133,7 @@ uint16_t test_svminv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uminv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svminv_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svminv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_u32,,)(pg, op);
 }
@@ -142,7 +150,7 @@ uint32_t test_svminv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uminv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svminv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svminv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_u64,,)(pg, op);
 }
@@ -159,7 +167,7 @@ uint64_t test_svminv_u64(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call half @llvm.aarch64.sve.fminv.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret half [[TMP1]]
 //
-float16_t test_svminv_f16(svbool_t pg, svfloat16_t op)
+float16_t test_svminv_f16(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_f16,,)(pg, op);
 }
@@ -176,7 +184,7 @@ float16_t test_svminv_f16(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call float @llvm.aarch64.sve.fminv.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret float [[TMP1]]
 //
-float32_t test_svminv_f32(svbool_t pg, svfloat32_t op)
+float32_t test_svminv_f32(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_f32,,)(pg, op);
 }
@@ -193,7 +201,7 @@ float32_t test_svminv_f32(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call double @llvm.aarch64.sve.fminv.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret double [[TMP1]]
 //
-float64_t test_svminv_f64(svbool_t pg, svfloat64_t op)
+float64_t test_svminv_f64(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svminv,_f64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
index 6946c5b472daa..f92216dfbbc5f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mla.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmla_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmla_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s8,_z,)(pg, op1, op2, op3);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmla_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmla_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmla_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s16,_z,)(pg, op1, op2, op3);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmla_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmla_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmla_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s32,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmla_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmla_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmla_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s64,_z,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmla_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmla_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmla_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u8,_z,)(pg, op1, op2, op3);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmla_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmla_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmla_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u16,_z,)(pg, op1, op2, op3);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmla_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmla_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmla_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u32,_z,)(pg, op1, op2, op3);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmla_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmla_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmla_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u64,_z,)(pg, op1, op2, op3);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmla_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmla_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmla_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s8,_m,)(pg, op1, op2, op3);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmla_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmla_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmla_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s16,_m,)(pg, op1, op2, op3);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmla_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmla_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmla_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s32,_m,)(pg, op1, op2, op3);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmla_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmla_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmla_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s64,_m,)(pg, op1, op2, op3);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmla_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmla_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmla_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u8,_m,)(pg, op1, op2, op3);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmla_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmla_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmla_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u16,_m,)(pg, op1, op2, op3);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmla_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmla_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmla_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u32,_m,)(pg, op1, op2, op3);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmla_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmla_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmla_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u64,_m,)(pg, op1, op2, op3);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmla_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmla_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmla_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s8,_x,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmla_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmla_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmla_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s16,_x,)(pg, op1, op2, op3);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmla_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmla_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmla_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s32,_x,)(pg, op1, op2, op3);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmla_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmla_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmla_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_s64,_x,)(pg, op1, op2, op3);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmla_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmla_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmla_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u8,_x,)(pg, op1, op2, op3);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmla_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmla_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmla_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u16,_x,)(pg, op1, op2, op3);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmla_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmla_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmla_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u32,_x,)(pg, op1, op2, op3);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmla_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmla_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmla_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_u64,_x,)(pg, op1, op2, op3);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmla_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmla_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmla_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s8,_z,)(pg, op1, op2, op3);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmla_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmla_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmla_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s16,_z,)(pg, op1, op2, op3);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmla_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmla_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmla_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s32,_z,)(pg, op1, op2, op3);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmla_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmla_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmla_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s64,_z,)(pg, op1, op2, op3);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmla_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmla_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmla_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u8,_z,)(pg, op1, op2, op3);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmla_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmla_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmla_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u16,_z,)(pg, op1, op2, op3);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmla_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmla_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmla_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u32,_z,)(pg, op1, op2, op3);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmla_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmla_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmla_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u64,_z,)(pg, op1, op2, op3);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmla_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmla_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmla_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s8,_m,)(pg, op1, op2, op3);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmla_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmla_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmla_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s16,_m,)(pg, op1, op2, op3);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmla_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmla_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmla_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s32,_m,)(pg, op1, op2, op3);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmla_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmla_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmla_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s64,_m,)(pg, op1, op2, op3);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmla_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmla_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmla_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u8,_m,)(pg, op1, op2, op3);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmla_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmla_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmla_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u16,_m,)(pg, op1, op2, op3);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmla_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmla_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmla_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u32,_m,)(pg, op1, op2, op3);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmla_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmla_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmla_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u64,_m,)(pg, op1, op2, op3);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmla_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmla_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmla_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s8,_x,)(pg, op1, op2, op3);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmla_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmla_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmla_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s16,_x,)(pg, op1, op2, op3);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmla_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmla_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmla_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s32,_x,)(pg, op1, op2, op3);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmla_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmla_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmla_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_s64,_x,)(pg, op1, op2, op3);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmla_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmla_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmla_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u8,_x,)(pg, op1, op2, op3);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmla_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmla_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmla_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u16,_x,)(pg, op1, op2, op3);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmla_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmla_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmla_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u32,_x,)(pg, op1, op2, op3);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmla_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmla_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmla_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_u64,_x,)(pg, op1, op2, op3);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmla_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla,_n_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1298,7 +1306,7 @@ svfloat64_t test_svmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 0);
 }
@@ -1313,7 +1321,7 @@ svfloat16_t test_svmla_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmla.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f16,,)(op1, op2, op3, 7);
 }
@@ -1328,7 +1336,7 @@ svfloat16_t test_svmla_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 0);
 }
@@ -1343,7 +1351,7 @@ svfloat32_t test_svmla_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmla.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f32,,)(op1, op2, op3, 3);
 }
@@ -1358,7 +1366,7 @@ svfloat32_t test_svmla_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 0);
 }
@@ -1373,7 +1381,7 @@ svfloat64_t test_svmla_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmla.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmla_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmla_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmla_lane,_f64,,)(op1, op2, op3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c
index 650b844e8ed0c..bbffece5348b3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mls.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmls_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmls_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s8,_z,)(pg, op1, op2, op3);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmls_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmls_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmls_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s16,_z,)(pg, op1, op2, op3);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmls_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmls_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmls_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s32,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmls_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmls_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmls_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s64,_z,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmls_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmls_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmls_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u8,_z,)(pg, op1, op2, op3);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmls_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmls_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmls_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u16,_z,)(pg, op1, op2, op3);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmls_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmls_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmls_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u32,_z,)(pg, op1, op2, op3);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmls_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmls_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmls_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u64,_z,)(pg, op1, op2, op3);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmls_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmls_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmls_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s8,_m,)(pg, op1, op2, op3);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmls_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmls_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmls_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s16,_m,)(pg, op1, op2, op3);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmls_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmls_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmls_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s32,_m,)(pg, op1, op2, op3);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmls_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmls_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmls_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s64,_m,)(pg, op1, op2, op3);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmls_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmls_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmls_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u8,_m,)(pg, op1, op2, op3);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmls_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmls_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmls_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u16,_m,)(pg, op1, op2, op3);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmls_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmls_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmls_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u32,_m,)(pg, op1, op2, op3);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmls_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmls_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmls_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u64,_m,)(pg, op1, op2, op3);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmls_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmls_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmls_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s8,_x,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmls_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmls_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmls_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s16,_x,)(pg, op1, op2, op3);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmls_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmls_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmls_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s32,_x,)(pg, op1, op2, op3);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmls_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmls_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmls_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_s64,_x,)(pg, op1, op2, op3);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmls_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmls_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmls_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u8,_x,)(pg, op1, op2, op3);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmls_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmls_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmls_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u16,_x,)(pg, op1, op2, op3);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmls_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmls_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmls_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u32,_x,)(pg, op1, op2, op3);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmls_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmls_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmls_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_u64,_x,)(pg, op1, op2, op3);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmls_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmls_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmls_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s8,_z,)(pg, op1, op2, op3);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmls_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmls_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmls_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s16,_z,)(pg, op1, op2, op3);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmls_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmls_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmls_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s32,_z,)(pg, op1, op2, op3);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmls_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmls_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmls_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s64,_z,)(pg, op1, op2, op3);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmls_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmls_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmls_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u8,_z,)(pg, op1, op2, op3);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmls_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmls_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmls_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u16,_z,)(pg, op1, op2, op3);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmls_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmls_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmls_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u32,_z,)(pg, op1, op2, op3);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmls_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmls_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmls_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u64,_z,)(pg, op1, op2, op3);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmls_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmls_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmls_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s8,_m,)(pg, op1, op2, op3);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmls_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmls_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmls_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s16,_m,)(pg, op1, op2, op3);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmls_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmls_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmls_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s32,_m,)(pg, op1, op2, op3);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmls_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmls_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmls_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s64,_m,)(pg, op1, op2, op3);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmls_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmls_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmls_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u8,_m,)(pg, op1, op2, op3);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmls_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmls_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmls_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u16,_m,)(pg, op1, op2, op3);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmls_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmls_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmls_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u32,_m,)(pg, op1, op2, op3);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmls_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmls_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmls_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u64,_m,)(pg, op1, op2, op3);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmls_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmls_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmls_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s8,_x,)(pg, op1, op2, op3);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmls_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmls_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmls_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s16,_x,)(pg, op1, op2, op3);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmls_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmls_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmls_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s32,_x,)(pg, op1, op2, op3);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmls_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmls_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmls_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_s64,_x,)(pg, op1, op2, op3);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmls_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmls_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmls_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u8,_x,)(pg, op1, op2, op3);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmls_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmls_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmls_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u16,_x,)(pg, op1, op2, op3);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmls_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmls_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmls_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u32,_x,)(pg, op1, op2, op3);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmls_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmls_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmls_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_u64,_x,)(pg, op1, op2, op3);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmls_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls,_n_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1298,7 +1306,7 @@ svfloat64_t test_svmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmls_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmls_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f16,,)(op1, op2, op3, 0);
 }
@@ -1313,7 +1321,7 @@ svfloat16_t test_svmls_lane_f16(svfloat16_t op1, svfloat16_t op2, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmls_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmls_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f16,,)(op1, op2, op3, 7);
 }
@@ -1328,7 +1336,7 @@ svfloat16_t test_svmls_lane_f16_1(svfloat16_t op1, svfloat16_t op2, svfloat16_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmls_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmls_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f32,,)(op1, op2, op3, 0);
 }
@@ -1343,7 +1351,7 @@ svfloat32_t test_svmls_lane_f32(svfloat32_t op1, svfloat32_t op2, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmls_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmls_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f32,,)(op1, op2, op3, 3);
 }
@@ -1358,7 +1366,7 @@ svfloat32_t test_svmls_lane_f32_1(svfloat32_t op1, svfloat32_t op2, svfloat32_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmls_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmls_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f64,,)(op1, op2, op3, 0);
 }
@@ -1373,7 +1381,7 @@ svfloat64_t test_svmls_lane_f64(svfloat64_t op1, svfloat64_t op2, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmls_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmls_lane_f64_1(svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmls_lane,_f64,,)(op1, op2, op3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c
index 79e68be49b2b3..18c5516a24dac 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mov.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]], <vscale x 16 x i1> [[OP]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svmov_b_z(svbool_t pg, svbool_t op)
+svbool_t test_svmov_b_z(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmov,_b,_z,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c
index 888b8331b6b50..ea2c59ff799fa 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_msb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmsb_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmsb_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s8,_z,)(pg, op1, op2, op3);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmsb_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmsb_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmsb_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s16,_z,)(pg, op1, op2, op3);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmsb_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmsb_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmsb_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s32,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmsb_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmsb_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmsb_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s64,_z,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmsb_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmsb_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmsb_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u8,_z,)(pg, op1, op2, op3);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmsb_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmsb_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmsb_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u16,_z,)(pg, op1, op2, op3);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmsb_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmsb_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmsb_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u32,_z,)(pg, op1, op2, op3);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmsb_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmsb_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmsb_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u64,_z,)(pg, op1, op2, op3);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmsb_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmsb_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmsb_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s8,_m,)(pg, op1, op2, op3);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmsb_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmsb_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmsb_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s16,_m,)(pg, op1, op2, op3);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmsb_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmsb_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmsb_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s32,_m,)(pg, op1, op2, op3);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmsb_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmsb_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmsb_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s64,_m,)(pg, op1, op2, op3);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmsb_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmsb_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmsb_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u8,_m,)(pg, op1, op2, op3);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmsb_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmsb_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmsb_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u16,_m,)(pg, op1, op2, op3);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmsb_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmsb_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmsb_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u32,_m,)(pg, op1, op2, op3);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmsb_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmsb_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmsb_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u64,_m,)(pg, op1, op2, op3);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmsb_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmsb_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
+svint8_t test_svmsb_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s8,_x,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmsb_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, svint8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP3:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmsb_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3)
+svint16_t test_svmsb_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s16,_x,)(pg, op1, op2, op3);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmsb_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, svint16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP3:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmsb_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3)
+svint32_t test_svmsb_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s32,_x,)(pg, op1, op2, op3);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmsb_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, svint32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP3:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmsb_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3)
+svint64_t test_svmsb_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_s64,_x,)(pg, op1, op2, op3);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmsb_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, svint64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP3:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmsb_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3)
+svuint8_t test_svmsb_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u8,_x,)(pg, op1, op2, op3);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmsb_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, svuint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP3:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmsb_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3)
+svuint16_t test_svmsb_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u16,_x,)(pg, op1, op2, op3);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmsb_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, svuint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP3:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmsb_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3)
+svuint32_t test_svmsb_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u32,_x,)(pg, op1, op2, op3);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmsb_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, svuint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP3:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmsb_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3)
+svuint64_t test_svmsb_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_u64,_x,)(pg, op1, op2, op3);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmsb_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, svuint6
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmsb_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmsb_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s8,_z,)(pg, op1, op2, op3);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmsb_n_s8_z(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmsb_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmsb_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s16,_z,)(pg, op1, op2, op3);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmsb_n_s16_z(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmsb_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmsb_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s32,_z,)(pg, op1, op2, op3);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmsb_n_s32_z(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmsb_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmsb_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s64,_z,)(pg, op1, op2, op3);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmsb_n_s64_z(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmsb_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmsb_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u8,_z,)(pg, op1, op2, op3);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmsb_n_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmsb_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmsb_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u16,_z,)(pg, op1, op2, op3);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmsb_n_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmsb_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmsb_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u32,_z,)(pg, op1, op2, op3);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmsb_n_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmsb_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmsb_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u64,_z,)(pg, op1, op2, op3);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmsb_n_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmsb_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmsb_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s8,_m,)(pg, op1, op2, op3);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmsb_n_s8_m(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmsb_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmsb_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s16,_m,)(pg, op1, op2, op3);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmsb_n_s16_m(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmsb_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmsb_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s32,_m,)(pg, op1, op2, op3);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmsb_n_s32_m(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmsb_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmsb_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s64,_m,)(pg, op1, op2, op3);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmsb_n_s64_m(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.msb.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmsb_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmsb_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u8,_m,)(pg, op1, op2, op3);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmsb_n_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.msb.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmsb_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmsb_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u16,_m,)(pg, op1, op2, op3);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmsb_n_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmsb_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmsb_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u32,_m,)(pg, op1, op2, op3);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmsb_n_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.msb.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmsb_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmsb_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u64,_m,)(pg, op1, op2, op3);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmsb_n_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmsb_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
+svint8_t test_svmsb_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s8,_x,)(pg, op1, op2, op3);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmsb_n_s8_x(svbool_t pg, svint8_t op1, svint8_t op2, int8_t op3)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmsb_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3)
+svint16_t test_svmsb_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s16,_x,)(pg, op1, op2, op3);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmsb_n_s16_x(svbool_t pg, svint16_t op1, svint16_t op2, int16_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmsb_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3)
+svint32_t test_svmsb_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s32,_x,)(pg, op1, op2, op3);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmsb_n_s32_x(svbool_t pg, svint32_t op1, svint32_t op2, int32_t
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmsb_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3)
+svint64_t test_svmsb_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_s64,_x,)(pg, op1, op2, op3);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmsb_n_s64_x(svbool_t pg, svint64_t op1, svint64_t op2, int64_t
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmsb_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3)
+svuint8_t test_svmsb_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u8,_x,)(pg, op1, op2, op3);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmsb_n_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2, uint8_t o
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmsb_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3)
+svuint16_t test_svmsb_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u16,_x,)(pg, op1, op2, op3);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmsb_n_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2, uint1
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmsb_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3)
+svuint32_t test_svmsb_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u32,_x,)(pg, op1, op2, op3);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmsb_n_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2, uint3
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmsb_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3)
+svuint64_t test_svmsb_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_u64,_x,)(pg, op1, op2, op3);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmsb_n_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2, uint6
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP3:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP3:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP3:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, fl
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmsb,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c
index f58f78eacb980..e122b57d532dd 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mul.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmul_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmul_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmul_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmul_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmul_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmul_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmul_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmul_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmul_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmul_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmul_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmul_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmul_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmul_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmul_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmul_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmul_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmul_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmul_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmul_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmul_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmul_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmul_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmul_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmul_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmul_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmul_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmul_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmul_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmul_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmul_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmul_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmul_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmul_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmul_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmul_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmul_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmul_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmul_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmul_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmul_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmul_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmul_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmul_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmul_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmul_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmul_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmul_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmul_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmul_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmul_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmul_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmul_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmul_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmul_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmul_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmul_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmul_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmul_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmul_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmul_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmul_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmul_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmul_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmul_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmul_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmul_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmul_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmul_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmul_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmul_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmul_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmul_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmul_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmul_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmul_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmul_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmul_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmul_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmul_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmul_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmul_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmul_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmul_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmul_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmul_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmul_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmul_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmul_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmul_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmul_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmul_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmul_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmul_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmul_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmul_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmul_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmul_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmul_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmul_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmul_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmul_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmul_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmul_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmul_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmul_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmul_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmul_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmul_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmul_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmul_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmul_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmul_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmul_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmul_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmul_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmul_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmul_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmul_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmul_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmul_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmul_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmul_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmul_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmul_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmul_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmul_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmul_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmul_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmul_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmul_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmul_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmul_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmul_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmul_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmul_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmul_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmul_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmul_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmul_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmul_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmul_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmul_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svmul_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmul_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmul_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svmul_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmul_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmul_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svmul_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmul_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmul_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svmul_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmul_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmul_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svmul_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmul_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmul_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svmul_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmul_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmul_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svmul_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmul_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmul_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svmul_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmul_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmul_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svmul_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmul_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmul_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svmul_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmul_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmul_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svmul_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmul_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmul_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svmul_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmul_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmul_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svmul_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmul_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmul_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svmul_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmul_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmul_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svmul_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmul_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmul_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svmul_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmul_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmul_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svmul_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmul_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmul_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svmul_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmul_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmul_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul,_n_f64,_x,)(pg, op1, op2);
 }
@@ -1298,7 +1306,7 @@ svfloat64_t test_svmul_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmul_lane_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmul_lane_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f16,,)(op1, op2, 0);
 }
@@ -1313,7 +1321,7 @@ svfloat16_t test_svmul_lane_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmul.lane.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svmul_lane_f16_1(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmul_lane_f16_1(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f16,,)(op1, op2, 7);
 }
@@ -1328,7 +1336,7 @@ svfloat16_t test_svmul_lane_f16_1(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmul_lane_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmul_lane_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f32,,)(op1, op2, 0);
 }
@@ -1343,7 +1351,7 @@ svfloat32_t test_svmul_lane_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmul.lane.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svmul_lane_f32_1(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmul_lane_f32_1(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f32,,)(op1, op2, 3);
 }
@@ -1358,7 +1366,7 @@ svfloat32_t test_svmul_lane_f32_1(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmul_lane_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmul_lane_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f64,,)(op1, op2, 0);
 }
@@ -1373,7 +1381,7 @@ svfloat64_t test_svmul_lane_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmul.lane.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svmul_lane_f64_1(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmul_lane_f64_1(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmul_lane,_f64,,)(op1, op2, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulh.c
index 6698fdfca836e..898911c840197 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulh.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmulh_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmulh_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svmulh_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmulh_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmulh_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svmulh_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmulh_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmulh_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svmulh_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmulh_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmulh_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svmulh_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmulh_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmulh_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svmulh_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmulh_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmulh_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svmulh_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmulh_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmulh_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svmulh_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmulh_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmulh_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svmulh_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmulh_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmulh_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svmulh_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmulh_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmulh_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svmulh_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmulh_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmulh_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svmulh_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmulh_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmulh_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svmulh_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmulh_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmulh_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svmulh_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmulh_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmulh_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svmulh_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmulh_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmulh_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svmulh_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmulh_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmulh_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svmulh_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmulh_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svmulh_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svmulh_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmulh_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svmulh_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svmulh_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmulh_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svmulh_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svmulh_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmulh_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svmulh_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svmulh_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmulh_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svmulh_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svmulh_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmulh_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svmulh_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svmulh_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmulh_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svmulh_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svmulh_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmulh_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svmulh_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svmulh_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svmulh_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmulh_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svmulh_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svmulh_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmulh_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svmulh_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svmulh_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmulh_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svmulh_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svmulh_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmulh_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svmulh_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svmulh_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmulh_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svmulh_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svmulh_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmulh_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svmulh_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svmulh_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmulh_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svmulh_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svmulh_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmulh_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svmulh_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmulh_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmulh_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svmulh_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmulh_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmulh_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svmulh_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmulh_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmulh_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svmulh_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmulh_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmulh_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svmulh_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmulh_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmulh_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svmulh_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmulh_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmulh_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svmulh_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmulh_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmulh_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svmulh_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmulh_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmulh_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svmulh_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svmulh_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svmulh_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svmulh_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svmulh_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svmulh_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svmulh_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svmulh_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svmulh_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svmulh_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svmulh_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svmulh_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svmulh_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svmulh_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svmulh_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svmulh_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svmulh_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svmulh_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svmulh_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svmulh_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svmulh_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svmulh_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svmulh_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svmulh_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulh,_n_u64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c
index e615b3eab17cd..e7b9e62436709 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_mulx.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmulx_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmulx_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f16,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svmulx_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmulx_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmulx_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f32,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svmulx_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmulx_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmulx_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f64,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svmulx_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmulx_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmulx_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f16,_m,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svmulx_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmulx_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmulx_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f32,_m,)(pg, op1, op2);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svmulx_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmulx_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmulx_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f64,_m,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svmulx_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmulx_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svmulx_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f16,_x,)(pg, op1, op2);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svmulx_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmulx_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svmulx_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f32,_x,)(pg, op1, op2);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svmulx_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmulx_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svmulx_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_f64,_x,)(pg, op1, op2);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svmulx_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svmulx_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmulx_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f16,_z,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svmulx_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svmulx_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmulx_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f32,_z,)(pg, op1, op2);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svmulx_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svmulx_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmulx_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f64,_z,)(pg, op1, op2);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svmulx_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmulx_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmulx_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f16,_m,)(pg, op1, op2);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svmulx_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmulx_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmulx_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f32,_m,)(pg, op1, op2);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svmulx_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmulx_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmulx_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f64,_m,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svmulx_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svmulx_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svmulx_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f16,_x,)(pg, op1, op2);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svmulx_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svmulx_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svmulx_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f32,_x,)(pg, op1, op2);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svmulx_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svmulx_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svmulx_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svmulx,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c
index 26e29149df2c6..5b8bede05af7d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nand.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nand.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svnand_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svnand_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnand,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_neg.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_neg.c
index f591b84930d4a..64b4d68951102 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_neg.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_neg.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svneg_s8_z(svbool_t pg, svint8_t op)
+svint8_t test_svneg_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svint8_t test_svneg_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svneg_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svneg_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svint16_t test_svneg_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svneg_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svneg_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svint32_t test_svneg_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svneg_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svneg_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svint64_t test_svneg_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svneg_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
+svint8_t test_svneg_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s8,_m,)(inactive, pg, op);
 }
@@ -107,7 +115,7 @@ svint8_t test_svneg_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svneg_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svneg_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s16,_m,)(inactive, pg, op);
 }
@@ -124,7 +132,7 @@ svint16_t test_svneg_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svneg_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svneg_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s32,_m,)(inactive, pg, op);
 }
@@ -141,7 +149,7 @@ svint32_t test_svneg_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svneg_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svneg_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s64,_m,)(inactive, pg, op);
 }
@@ -156,7 +164,7 @@ svint64_t test_svneg_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.neg.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svneg_s8_x(svbool_t pg, svint8_t op)
+svint8_t test_svneg_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s8,_x,)(pg, op);
 }
@@ -173,7 +181,7 @@ svint8_t test_svneg_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.neg.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svneg_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svneg_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s16,_x,)(pg, op);
 }
@@ -190,7 +198,7 @@ svint16_t test_svneg_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svneg_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svneg_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s32,_x,)(pg, op);
 }
@@ -207,7 +215,7 @@ svint32_t test_svneg_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.neg.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svneg_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svneg_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_s64,_x,)(pg, op);
 }
@@ -224,7 +232,7 @@ svint64_t test_svneg_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svneg_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svneg_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f16,_z,)(pg, op);
 }
@@ -241,7 +249,7 @@ svfloat16_t test_svneg_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svneg_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svneg_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f32,_z,)(pg, op);
 }
@@ -258,7 +266,7 @@ svfloat32_t test_svneg_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svneg_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svneg_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f64,_z,)(pg, op);
 }
@@ -275,7 +283,7 @@ svfloat64_t test_svneg_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svneg_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svneg_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f16,_m,)(inactive, pg, op);
 }
@@ -292,7 +300,7 @@ svfloat16_t test_svneg_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svneg_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svneg_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f32,_m,)(inactive, pg, op);
 }
@@ -309,7 +317,7 @@ svfloat32_t test_svneg_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svneg_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svneg_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f64,_m,)(inactive, pg, op);
 }
@@ -326,7 +334,7 @@ svfloat64_t test_svneg_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fneg.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svneg_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svneg_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f16,_x,)(pg, op);
 }
@@ -343,7 +351,7 @@ svfloat16_t test_svneg_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svneg_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svneg_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f32,_x,)(pg, op);
 }
@@ -360,7 +368,7 @@ svfloat32_t test_svneg_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fneg.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svneg_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svneg_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svneg,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c
index 1a3edeae057c8..e56c554f385e4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmad.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svnmad_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svnmad_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svnmad_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svnmad_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svnmad_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svnmad_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP3:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svnmad_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP3:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svnmad_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP3:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svnmad_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svnmad_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svnmad_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svnmad_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmad.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svnmad_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svnmad_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmad.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svnmad_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svnmad_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svnmad_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmad_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmad,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c
index 9545ec736f0df..a6d5a0a2039d9 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmla.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svnmla_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svnmla_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svnmla_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svnmla_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svnmla_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svnmla_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svnmla_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svnmla_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svnmla_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svnmla_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svnmla_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svnmla_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svnmla_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svnmla_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svnmla_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svnmla_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svnmla_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmla_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmla,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c
index 12d564e04feb3..d0f12cfc59f16 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmls.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svnmls_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svnmls_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svnmls_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svnmls_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svnmls_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svnmls_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svnmls_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svnmls_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svnmls_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svnmls_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svnmls_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svnmls_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svnmls_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svnmls_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svnmls_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svnmls_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svnmls_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmls_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmls,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c
index e12b13d0469f1..1a5eea19cb486 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nmsb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f16,_z,)(pg, op1, op2, op3);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svnmsb_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f32,_z,)(pg, op1, op2, op3);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svnmsb_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f64,_z,)(pg, op1, op2, op3);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svnmsb_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f16,_m,)(pg, op1, op2, op3);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svnmsb_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f32,_m,)(pg, op1, op2, op3);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svnmsb_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f64,_m,)(pg, op1, op2, op3);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svnmsb_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP3:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3)
+svfloat16_t test_svnmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svfloat16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f16,_x,)(pg, op1, op2, op3);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svnmsb_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP3:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3)
+svfloat32_t test_svnmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f32,_x,)(pg, op1, op2, op3);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svnmsb_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP3:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3)
+svfloat64_t test_svnmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_f64,_x,)(pg, op1, op2, op3);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svnmsb_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svf
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svnmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f16,_z,)(pg, op1, op2, op3);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svnmsb_n_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svnmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f32,_z,)(pg, op1, op2, op3);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svnmsb_n_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svnmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f64,_z,)(pg, op1, op2, op3);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svnmsb_n_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmsb.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f16,_m,)(pg, op1, op2, op3);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svnmsb_n_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f32,_m,)(pg, op1, op2, op3);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svnmsb_n_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmsb.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f64,_m,)(pg, op1, op2, op3);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svnmsb_n_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svnmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3)
+svfloat16_t test_svnmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, float16_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f16,_x,)(pg, op1, op2, op3);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svnmsb_n_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svnmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3)
+svfloat32_t test_svnmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, float32_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f32,_x,)(pg, op1, op2, op3);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svnmsb_n_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, f
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svnmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3)
+svfloat64_t test_svnmsb_n_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, float64_t op3) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnmsb,_n_f64,_x,)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c
index d10b592dae815..ae09bed7fff72 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_nor.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.nor.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svnor_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svnor_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnor,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c
index 68e538b758a96..c329d9e1137a2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_not.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svnot_s8_z(svbool_t pg, svint8_t op)
+svint8_t test_svnot_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svint8_t test_svnot_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svnot_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svnot_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svint16_t test_svnot_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svnot_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svnot_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svint32_t test_svnot_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svnot_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svnot_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svint64_t test_svnot_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svnot_u8_z(svbool_t pg, svuint8_t op)
+svuint8_t test_svnot_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u8,_z,)(pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svnot_u8_z(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svnot_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svnot_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u16,_z,)(pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svnot_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svnot_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svnot_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u32,_z,)(pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svnot_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svnot_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svnot_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u64,_z,)(pg, op);
 }
@@ -156,7 +164,7 @@ svuint64_t test_svnot_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
+svint8_t test_svnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s8,_m,)(inactive, pg, op);
 }
@@ -173,7 +181,7 @@ svint8_t test_svnot_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s16,_m,)(inactive, pg, op);
 }
@@ -190,7 +198,7 @@ svint16_t test_svnot_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s32,_m,)(inactive, pg, op);
 }
@@ -207,7 +215,7 @@ svint32_t test_svnot_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s64,_m,)(inactive, pg, op);
 }
@@ -222,7 +230,7 @@ svint64_t test_svnot_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
+svuint8_t test_svnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u8,_m,)(inactive, pg, op);
 }
@@ -239,7 +247,7 @@ svuint8_t test_svnot_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u16,_m,)(inactive, pg, op);
 }
@@ -256,7 +264,7 @@ svuint16_t test_svnot_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u32,_m,)(inactive, pg, op);
 }
@@ -273,7 +281,7 @@ svuint32_t test_svnot_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u64,_m,)(inactive, pg, op);
 }
@@ -288,7 +296,7 @@ svuint64_t test_svnot_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svnot_s8_x(svbool_t pg, svint8_t op)
+svint8_t test_svnot_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s8,_x,)(pg, op);
 }
@@ -305,7 +313,7 @@ svint8_t test_svnot_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svnot_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svnot_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s16,_x,)(pg, op);
 }
@@ -322,7 +330,7 @@ svint16_t test_svnot_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svnot_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svnot_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s32,_x,)(pg, op);
 }
@@ -339,7 +347,7 @@ svint32_t test_svnot_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svnot_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svnot_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_s64,_x,)(pg, op);
 }
@@ -354,7 +362,7 @@ svint64_t test_svnot_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.not.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svnot_u8_x(svbool_t pg, svuint8_t op)
+svuint8_t test_svnot_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u8,_x,)(pg, op);
 }
@@ -371,7 +379,7 @@ svuint8_t test_svnot_u8_x(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.not.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svnot_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svnot_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u16,_x,)(pg, op);
 }
@@ -388,7 +396,7 @@ svuint16_t test_svnot_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svnot_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svnot_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u32,_x,)(pg, op);
 }
@@ -405,7 +413,7 @@ svuint32_t test_svnot_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.not.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svnot_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svnot_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_u64,_x,)(pg, op);
 }
@@ -420,7 +428,7 @@ svuint64_t test_svnot_u64_x(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.eor.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]], <vscale x 16 x i1> [[PG]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svnot_b_z(svbool_t pg, svbool_t op)
+svbool_t test_svnot_b_z(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svnot,_b,_z,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c
index 6cec937818a17..43b6f32771b97 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orn.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orn.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svorn_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svorn_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorn,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c
index 111ed80f96e62..259b031bb817e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svorr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svorr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svorr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svorr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svorr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svorr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svorr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svorr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svorr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svorr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svorr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svorr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svorr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svorr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svorr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svorr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svorr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svorr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svorr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svorr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svorr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svorr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svorr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svorr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svorr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svorr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svorr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svorr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svorr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svorr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svorr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svorr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svorr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svorr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svorr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svorr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svorr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svorr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svorr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svorr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svorr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svorr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svorr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svorr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svorr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svorr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svorr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svorr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svorr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svorr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svorr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svorr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svorr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svorr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svorr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svorr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svorr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svorr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svorr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svorr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svorr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svorr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svorr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svorr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svorr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svorr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svorr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svorr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svorr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svorr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svorr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svorr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svorr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svorr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svorr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svorr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svorr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svorr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svorr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svorr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svorr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svorr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svorr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svorr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svorr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svorr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svorr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svorr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svorr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svorr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svorr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svorr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svorr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svorr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svorr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svorr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svorr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svorr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svorr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svorr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svorr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svorr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svorr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svorr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svorr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svorr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svorr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svorr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svorr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svorr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svorr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svorr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svorr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svorr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svorr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svorr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svorr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svorr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svorr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svorr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svorr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svorr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svorr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svorr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svorr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svorr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svorr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svorr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svorr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svorr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svorr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svorr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svorr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svorr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svorr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svorr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svorr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svorr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svorr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svorr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svorr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svorr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svorr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_n_u64,_x,)(pg, op1, op2);
 }
@@ -944,7 +952,7 @@ svuint64_t test_svorr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.orr.z.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svorr_b_z(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svorr_b_z(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorr,_b,_z,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c
index 74759eba210b2..548baecbc3e3b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_orv.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-int8_t test_svorv_s8(svbool_t pg, svint8_t op)
+int8_t test_svorv_s8(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_s8,,)(pg, op);
 }
@@ -41,7 +49,7 @@ int8_t test_svorv_s8(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-int16_t test_svorv_s16(svbool_t pg, svint16_t op)
+int16_t test_svorv_s16(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_s16,,)(pg, op);
 }
@@ -58,7 +66,7 @@ int16_t test_svorv_s16(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svorv_s32(svbool_t pg, svint32_t op)
+int32_t test_svorv_s32(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_s32,,)(pg, op);
 }
@@ -75,7 +83,7 @@ int32_t test_svorv_s32(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svorv_s64(svbool_t pg, svint64_t op)
+int64_t test_svorv_s64(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_s64,,)(pg, op);
 }
@@ -90,7 +98,7 @@ int64_t test_svorv_s64(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.aarch64.sve.orv.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i8 [[TMP0]]
 //
-uint8_t test_svorv_u8(svbool_t pg, svuint8_t op)
+uint8_t test_svorv_u8(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_u8,,)(pg, op);
 }
@@ -107,7 +115,7 @@ uint8_t test_svorv_u8(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i16 @llvm.aarch64.sve.orv.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i16 [[TMP1]]
 //
-uint16_t test_svorv_u16(svbool_t pg, svuint16_t op)
+uint16_t test_svorv_u16(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_u16,,)(pg, op);
 }
@@ -124,7 +132,7 @@ uint16_t test_svorv_u16(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.orv.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svorv_u32(svbool_t pg, svuint32_t op)
+uint32_t test_svorv_u32(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_u32,,)(pg, op);
 }
@@ -141,7 +149,7 @@ uint32_t test_svorv_u32(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.orv.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svorv_u64(svbool_t pg, svuint64_t op)
+uint64_t test_svorv_u64(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svorv,_u64,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfalse.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfalse.c
index 846ec490e986c..375ed4ed96044 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfalse.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfalse.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -22,7 +30,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> zeroinitializer
 //
-svbool_t test_svpfalse_b()
+svbool_t test_svpfalse_b(void) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svpfalse,_b,,)();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c
index 82960d0bad416..d27dda2eb26bd 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pfirst.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pfirst.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpfirst_b(svbool_t pg, svbool_t op)
+svbool_t test_svpfirst_b(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svpfirst,_b,,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c
index 9b23c760700af..7bbbccdd22f08 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_pnext.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svpnext_b8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pnext.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
@@ -15,7 +23,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.pnext.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svpnext_b8(svbool_t pg, svbool_t op)
+svbool_t test_svpnext_b8(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svpnext_b8(pg, op);
 }
@@ -36,7 +44,7 @@ svbool_t test_svpnext_b8(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
 //
-svbool_t test_svpnext_b16(svbool_t pg, svbool_t op)
+svbool_t test_svpnext_b16(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svpnext_b16(pg, op);
 }
@@ -57,7 +65,7 @@ svbool_t test_svpnext_b16(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
 //
-svbool_t test_svpnext_b32(svbool_t pg, svbool_t op)
+svbool_t test_svpnext_b32(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svpnext_b32(pg, op);
 }
@@ -78,7 +86,7 @@ svbool_t test_svpnext_b32(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP2]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
 //
-svbool_t test_svpnext_b64(svbool_t pg, svbool_t op)
+svbool_t test_svpnext_b64(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svpnext_b64(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
index ee183399671c4..767b1d0ddc960 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb(svbool_t pg, const void *base)
+void test_svprfb(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL1KEEP);
 }
@@ -39,7 +47,7 @@ void test_svprfb(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_1(svbool_t pg, const void *base)
+void test_svprfb_1(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL1STRM);
 }
@@ -54,7 +62,7 @@ void test_svprfb_1(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_2(svbool_t pg, const void *base)
+void test_svprfb_2(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL2KEEP);
 }
@@ -69,7 +77,7 @@ void test_svprfb_2(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_3(svbool_t pg, const void *base)
+void test_svprfb_3(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL2STRM);
 }
@@ -84,7 +92,7 @@ void test_svprfb_3(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_4(svbool_t pg, const void *base)
+void test_svprfb_4(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL3KEEP);
 }
@@ -99,7 +107,7 @@ void test_svprfb_4(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 5)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_5(svbool_t pg, const void *base)
+void test_svprfb_5(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PLDL3STRM);
 }
@@ -114,7 +122,7 @@ void test_svprfb_5(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 8)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_6(svbool_t pg, const void *base)
+void test_svprfb_6(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL1KEEP);
 }
@@ -129,7 +137,7 @@ void test_svprfb_6(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 9)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_7(svbool_t pg, const void *base)
+void test_svprfb_7(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL1STRM);
 }
@@ -144,7 +152,7 @@ void test_svprfb_7(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 10)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_8(svbool_t pg, const void *base)
+void test_svprfb_8(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL2KEEP);
 }
@@ -159,7 +167,7 @@ void test_svprfb_8(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 11)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_9(svbool_t pg, const void *base)
+void test_svprfb_9(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL2STRM);
 }
@@ -174,7 +182,7 @@ void test_svprfb_9(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 12)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_10(svbool_t pg, const void *base)
+void test_svprfb_10(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL3KEEP);
 }
@@ -189,7 +197,7 @@ void test_svprfb_10(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]], i32 13)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_11(svbool_t pg, const void *base)
+void test_svprfb_11(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfb(pg, base, SV_PSTL3STRM);
 }
@@ -206,11 +214,13 @@ void test_svprfb_11(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum)
+void test_svprfb_vnum(svbool_t pg, const void *base, int64_t vnum) MODE_ATTR
 {
   return svprfb_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
 
+#ifndef __ARM_FEATURE_SME
+
 // CHECK-LABEL: @test_svprfb_gather_u32base(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -347,3 +357,5 @@ void test_svprfb_gather_u64base_offset(svbool_t pg, svuint64_t bases, int64_t of
 {
   return SVE_ACLE_FUNC(svprfb_gather,_u64base,_offset,)(pg, bases, offset, SV_PLDL1KEEP);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
index 360fb5be01090..8d6ee3024e703 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd(svbool_t pg, const void *base)
+void test_svprfd(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL1KEEP);
 }
@@ -43,7 +51,7 @@ void test_svprfd(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_1(svbool_t pg, const void *base)
+void test_svprfd_1(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL1STRM);
 }
@@ -60,7 +68,7 @@ void test_svprfd_1(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_2(svbool_t pg, const void *base)
+void test_svprfd_2(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL2KEEP);
 }
@@ -77,7 +85,7 @@ void test_svprfd_2(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_3(svbool_t pg, const void *base)
+void test_svprfd_3(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL2STRM);
 }
@@ -94,7 +102,7 @@ void test_svprfd_3(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_4(svbool_t pg, const void *base)
+void test_svprfd_4(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL3KEEP);
 }
@@ -111,7 +119,7 @@ void test_svprfd_4(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 5)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_5(svbool_t pg, const void *base)
+void test_svprfd_5(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PLDL3STRM);
 }
@@ -128,7 +136,7 @@ void test_svprfd_5(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 8)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_6(svbool_t pg, const void *base)
+void test_svprfd_6(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL1KEEP);
 }
@@ -145,7 +153,7 @@ void test_svprfd_6(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 9)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_7(svbool_t pg, const void *base)
+void test_svprfd_7(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL1STRM);
 }
@@ -162,7 +170,7 @@ void test_svprfd_7(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 10)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_8(svbool_t pg, const void *base)
+void test_svprfd_8(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL2KEEP);
 }
@@ -179,7 +187,7 @@ void test_svprfd_8(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 11)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_9(svbool_t pg, const void *base)
+void test_svprfd_9(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL2STRM);
 }
@@ -196,7 +204,7 @@ void test_svprfd_9(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 12)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_10(svbool_t pg, const void *base)
+void test_svprfd_10(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL3KEEP);
 }
@@ -213,7 +221,7 @@ void test_svprfd_10(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 13)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_11(svbool_t pg, const void *base)
+void test_svprfd_11(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfd(pg, base, SV_PSTL3STRM);
 }
@@ -232,11 +240,13 @@ void test_svprfd_11(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv2i1(<vscale x 2 x i1> [[TMP0]], ptr [[TMP1]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum)
+void test_svprfd_vnum(svbool_t pg, const void *base, int64_t vnum) MODE_ATTR
 {
   return svprfd_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
 
+#ifndef __ARM_FEATURE_SME
+
 // CHECK-LABEL: @test_svprfd_gather_u32base(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -376,3 +386,5 @@ void test_svprfd_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t ind
 {
   return SVE_ACLE_FUNC(svprfd_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
index 40c0993f14529..d7b9c47372763 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfh.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh(svbool_t pg, const void *base)
+void test_svprfh(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL1KEEP);
 }
@@ -43,7 +51,7 @@ void test_svprfh(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_1(svbool_t pg, const void *base)
+void test_svprfh_1(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL1STRM);
 }
@@ -60,7 +68,7 @@ void test_svprfh_1(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_2(svbool_t pg, const void *base)
+void test_svprfh_2(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL2KEEP);
 }
@@ -77,7 +85,7 @@ void test_svprfh_2(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_3(svbool_t pg, const void *base)
+void test_svprfh_3(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL2STRM);
 }
@@ -94,7 +102,7 @@ void test_svprfh_3(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_4(svbool_t pg, const void *base)
+void test_svprfh_4(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL3KEEP);
 }
@@ -111,7 +119,7 @@ void test_svprfh_4(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 5)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_5(svbool_t pg, const void *base)
+void test_svprfh_5(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PLDL3STRM);
 }
@@ -128,7 +136,7 @@ void test_svprfh_5(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 8)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_6(svbool_t pg, const void *base)
+void test_svprfh_6(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL1KEEP);
 }
@@ -145,7 +153,7 @@ void test_svprfh_6(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 9)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_7(svbool_t pg, const void *base)
+void test_svprfh_7(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL1STRM);
 }
@@ -162,7 +170,7 @@ void test_svprfh_7(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 10)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_8(svbool_t pg, const void *base)
+void test_svprfh_8(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL2KEEP);
 }
@@ -179,7 +187,7 @@ void test_svprfh_8(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 11)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_9(svbool_t pg, const void *base)
+void test_svprfh_9(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL2STRM);
 }
@@ -196,7 +204,7 @@ void test_svprfh_9(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 12)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_10(svbool_t pg, const void *base)
+void test_svprfh_10(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL3KEEP);
 }
@@ -213,7 +221,7 @@ void test_svprfh_10(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 13)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_11(svbool_t pg, const void *base)
+void test_svprfh_11(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfh(pg, base, SV_PSTL3STRM);
 }
@@ -232,11 +240,13 @@ void test_svprfh_11(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv8i1(<vscale x 8 x i1> [[TMP0]], ptr [[TMP1]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfh_vnum(svbool_t pg, const void *base, int64_t vnum)
+void test_svprfh_vnum(svbool_t pg, const void *base, int64_t vnum) MODE_ATTR
 {
   return svprfh_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
 
+#ifndef __ARM_FEATURE_SME
+
 // CHECK-LABEL: @test_svprfh_gather_u32base(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -376,3 +386,5 @@ void test_svprfh_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t ind
 {
   return SVE_ACLE_FUNC(svprfh_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
index a0d8203e64f6c..5e29c4408b467 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_prfw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw(svbool_t pg, const void *base)
+void test_svprfw(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL1KEEP);
 }
@@ -43,7 +51,7 @@ void test_svprfw(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_1(svbool_t pg, const void *base)
+void test_svprfw_1(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL1STRM);
 }
@@ -60,7 +68,7 @@ void test_svprfw_1(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_2(svbool_t pg, const void *base)
+void test_svprfw_2(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL2KEEP);
 }
@@ -77,7 +85,7 @@ void test_svprfw_2(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_3(svbool_t pg, const void *base)
+void test_svprfw_3(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL2STRM);
 }
@@ -94,7 +102,7 @@ void test_svprfw_3(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 4)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_4(svbool_t pg, const void *base)
+void test_svprfw_4(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL3KEEP);
 }
@@ -111,7 +119,7 @@ void test_svprfw_4(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 5)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_5(svbool_t pg, const void *base)
+void test_svprfw_5(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PLDL3STRM);
 }
@@ -128,7 +136,7 @@ void test_svprfw_5(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 8)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_6(svbool_t pg, const void *base)
+void test_svprfw_6(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL1KEEP);
 }
@@ -145,7 +153,7 @@ void test_svprfw_6(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 9)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_7(svbool_t pg, const void *base)
+void test_svprfw_7(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL1STRM);
 }
@@ -162,7 +170,7 @@ void test_svprfw_7(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 10)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_8(svbool_t pg, const void *base)
+void test_svprfw_8(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL2KEEP);
 }
@@ -179,7 +187,7 @@ void test_svprfw_8(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 11)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_9(svbool_t pg, const void *base)
+void test_svprfw_9(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL2STRM);
 }
@@ -196,7 +204,7 @@ void test_svprfw_9(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 12)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_10(svbool_t pg, const void *base)
+void test_svprfw_10(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL3KEEP);
 }
@@ -213,7 +221,7 @@ void test_svprfw_10(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]], i32 13)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_11(svbool_t pg, const void *base)
+void test_svprfw_11(svbool_t pg, const void *base) MODE_ATTR
 {
   return svprfw(pg, base, SV_PSTL3STRM);
 }
@@ -232,11 +240,13 @@ void test_svprfw_11(svbool_t pg, const void *base)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.prf.nxv4i1(<vscale x 4 x i1> [[TMP0]], ptr [[TMP1]], i32 0)
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svprfw_vnum(svbool_t pg, const void *base, int64_t vnum)
+void test_svprfw_vnum(svbool_t pg, const void *base, int64_t vnum) MODE_ATTR
 {
   return svprfw_vnum(pg, base, vnum, SV_PLDL1KEEP);
 }
 
+#ifndef __ARM_FEATURE_SME
+
 // CHECK-LABEL: @test_svprfw_gather_u32base(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -376,3 +386,5 @@ void test_svprfw_gather_u64base_index(svbool_t pg, svuint64_t bases, int64_t ind
 {
   return SVE_ACLE_FUNC(svprfw_gather,_u64base,_index,)(pg, bases, index, SV_PLDL1KEEP);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c
index 4a640dd69adaf..8f12562b994ab 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptest.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svptest_any(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
@@ -15,7 +23,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i1 [[TMP0]]
 //
-bool test_svptest_any(svbool_t pg, svbool_t op)
+bool test_svptest_any(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svptest_any(pg, op);
 }
@@ -30,7 +38,7 @@ bool test_svptest_any(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sve.ptest.first.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i1 [[TMP0]]
 //
-bool test_svptest_first(svbool_t pg, svbool_t op)
+bool test_svptest_first(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svptest_first(pg, op);
 }
@@ -45,7 +53,7 @@ bool test_svptest_first(svbool_t pg, svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i1 @llvm.aarch64.sve.ptest.last.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret i1 [[TMP0]]
 //
-bool test_svptest_last(svbool_t pg, svbool_t op)
+bool test_svptest_last(svbool_t pg, svbool_t op) MODE_ATTR
 {
   return svptest_last(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptrue.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptrue.c
index 808f6aa061f57..a16a02baa11bc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptrue.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ptrue.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svptrue_b8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
@@ -15,7 +23,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_b8()
+svbool_t test_svptrue_b8(void) MODE_ATTR
 {
   return svptrue_b8();
 }
@@ -32,7 +40,7 @@ svbool_t test_svptrue_b8()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_b16()
+svbool_t test_svptrue_b16(void) MODE_ATTR
 {
   return svptrue_b16();
 }
@@ -49,7 +57,7 @@ svbool_t test_svptrue_b16()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_b32()
+svbool_t test_svptrue_b32(void) MODE_ATTR
 {
   return svptrue_b32();
 }
@@ -66,7 +74,7 @@ svbool_t test_svptrue_b32()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_b64()
+svbool_t test_svptrue_b64(void) MODE_ATTR
 {
   return svptrue_b64();
 }
@@ -81,7 +89,7 @@ svbool_t test_svptrue_b64()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8()
+svbool_t test_svptrue_pat_b8(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_POW2);
 }
@@ -96,7 +104,7 @@ svbool_t test_svptrue_pat_b8()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_1()
+svbool_t test_svptrue_pat_b8_1(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL1);
 }
@@ -111,7 +119,7 @@ svbool_t test_svptrue_pat_b8_1()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_2()
+svbool_t test_svptrue_pat_b8_2(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL2);
 }
@@ -126,7 +134,7 @@ svbool_t test_svptrue_pat_b8_2()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_3()
+svbool_t test_svptrue_pat_b8_3(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL3);
 }
@@ -141,7 +149,7 @@ svbool_t test_svptrue_pat_b8_3()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 4)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_4()
+svbool_t test_svptrue_pat_b8_4(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL4);
 }
@@ -156,7 +164,7 @@ svbool_t test_svptrue_pat_b8_4()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 5)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_5()
+svbool_t test_svptrue_pat_b8_5(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL5);
 }
@@ -171,7 +179,7 @@ svbool_t test_svptrue_pat_b8_5()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 6)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_6()
+svbool_t test_svptrue_pat_b8_6(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL6);
 }
@@ -186,7 +194,7 @@ svbool_t test_svptrue_pat_b8_6()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 7)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_7()
+svbool_t test_svptrue_pat_b8_7(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL7);
 }
@@ -201,7 +209,7 @@ svbool_t test_svptrue_pat_b8_7()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 8)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_8()
+svbool_t test_svptrue_pat_b8_8(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL8);
 }
@@ -216,7 +224,7 @@ svbool_t test_svptrue_pat_b8_8()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 9)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_9()
+svbool_t test_svptrue_pat_b8_9(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL16);
 }
@@ -231,7 +239,7 @@ svbool_t test_svptrue_pat_b8_9()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 10)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_10()
+svbool_t test_svptrue_pat_b8_10(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL32);
 }
@@ -246,7 +254,7 @@ svbool_t test_svptrue_pat_b8_10()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_11()
+svbool_t test_svptrue_pat_b8_11(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL64);
 }
@@ -261,7 +269,7 @@ svbool_t test_svptrue_pat_b8_11()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 12)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_12()
+svbool_t test_svptrue_pat_b8_12(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL128);
 }
@@ -276,7 +284,7 @@ svbool_t test_svptrue_pat_b8_12()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 13)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_13()
+svbool_t test_svptrue_pat_b8_13(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_VL256);
 }
@@ -291,7 +299,7 @@ svbool_t test_svptrue_pat_b8_13()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 29)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_14()
+svbool_t test_svptrue_pat_b8_14(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_MUL4);
 }
@@ -306,7 +314,7 @@ svbool_t test_svptrue_pat_b8_14()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 30)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_15()
+svbool_t test_svptrue_pat_b8_15(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_MUL3);
 }
@@ -321,7 +329,7 @@ svbool_t test_svptrue_pat_b8_15()
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svptrue_pat_b8_16()
+svbool_t test_svptrue_pat_b8_16(void) MODE_ATTR
 {
   return svptrue_pat_b8(SV_ALL);
 }
@@ -338,7 +346,7 @@ svbool_t test_svptrue_pat_b8_16()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_pat_b16()
+svbool_t test_svptrue_pat_b16(void) MODE_ATTR
 {
   return svptrue_pat_b16(SV_POW2);
 }
@@ -355,7 +363,7 @@ svbool_t test_svptrue_pat_b16()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_pat_b32()
+svbool_t test_svptrue_pat_b32(void) MODE_ATTR
 {
   return svptrue_pat_b32(SV_VL1);
 }
@@ -372,7 +380,7 @@ svbool_t test_svptrue_pat_b32()
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svptrue_pat_b64()
+svbool_t test_svptrue_pat_b64(void) MODE_ATTR
 {
   return svptrue_pat_b64(SV_VL2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qadd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qadd.c
index b536bcb52a50a..33a52973deadc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qadd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qadd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svqadd_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svqadd_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svqadd_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqadd_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svqadd_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svqadd_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqadd_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svqadd_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svqadd_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqadd_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svqadd_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svqadd_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svqadd_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svqadd_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svqadd_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqadd_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svqadd_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svqadd_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqadd_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svqadd_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svqadd_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqadd_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svqadd_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_u64,,)(op1, op2);
 }
@@ -148,7 +156,7 @@ svuint64_t test_svqadd_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svqadd_n_s8(svint8_t op1, int8_t op2)
+svint8_t test_svqadd_n_s8(svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_s8,,)(op1, op2);
 }
@@ -167,7 +175,7 @@ svint8_t test_svqadd_n_s8(svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqadd_n_s16(svint16_t op1, int16_t op2)
+svint16_t test_svqadd_n_s16(svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_s16,,)(op1, op2);
 }
@@ -186,7 +194,7 @@ svint16_t test_svqadd_n_s16(svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqadd_n_s32(svint32_t op1, int32_t op2)
+svint32_t test_svqadd_n_s32(svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_s32,,)(op1, op2);
 }
@@ -205,7 +213,7 @@ svint32_t test_svqadd_n_s32(svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqadd_n_s64(svint64_t op1, int64_t op2)
+svint64_t test_svqadd_n_s64(svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_s64,,)(op1, op2);
 }
@@ -224,7 +232,7 @@ svint64_t test_svqadd_n_s64(svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svqadd_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svqadd_n_u8(svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_u8,,)(op1, op2);
 }
@@ -243,7 +251,7 @@ svuint8_t test_svqadd_n_u8(svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqadd_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svqadd_n_u16(svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_u16,,)(op1, op2);
 }
@@ -262,7 +270,7 @@ svuint16_t test_svqadd_n_u16(svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqadd_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svqadd_n_u32(svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_u32,,)(op1, op2);
 }
@@ -281,7 +289,7 @@ svuint32_t test_svqadd_n_u32(svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqadd_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svqadd_n_u64(svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqadd,_n_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecb.c
index 70fff215aa3b6..3a4c8e561e118 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecb.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecb_n_s32(int32_t op)
+int32_t test_svqdecb_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqdecb_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecb.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecb_n_s32_1(int32_t op)
+int32_t test_svqdecb_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqdecb_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecb.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecb_n_s64(int64_t op)
+int64_t test_svqdecb_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqdecb_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecb.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecb_n_u32(uint32_t op)
+uint32_t test_svqdecb_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqdecb_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecb.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecb_n_u64(uint64_t op)
+uint64_t test_svqdecb_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqdecb_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecb.n32(i32 [[OP:%.*]], i32 0, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecb_pat_n_s32(int32_t op)
+int32_t test_svqdecb_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb_pat,_n_s32,,)(op, SV_POW2, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqdecb_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecb.n64(i64 [[OP:%.*]], i32 1, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecb_pat_n_s64(int64_t op)
+int64_t test_svqdecb_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb_pat,_n_s64,,)(op, SV_VL1, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqdecb_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecb.n32(i32 [[OP:%.*]], i32 2, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecb_pat_n_u32(uint32_t op)
+uint32_t test_svqdecb_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb_pat,_n_u32,,)(op, SV_VL2, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqdecb_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecb.n64(i64 [[OP:%.*]], i32 3, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecb_pat_n_u64(uint64_t op)
+uint64_t test_svqdecb_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecb_pat,_n_u64,,)(op, SV_VL3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecd.c
index 3b1fcf11ef509..26020908c5d92 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecd.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecd_n_s32(int32_t op)
+int32_t test_svqdecd_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqdecd_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecd.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecd_n_s32_1(int32_t op)
+int32_t test_svqdecd_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqdecd_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecd.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecd_n_s64(int64_t op)
+int64_t test_svqdecd_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqdecd_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecd.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecd_n_u32(uint32_t op)
+uint32_t test_svqdecd_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqdecd_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecd.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecd_n_u64(uint64_t op)
+uint64_t test_svqdecd_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqdecd_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecd.n32(i32 [[OP:%.*]], i32 4, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecd_pat_n_s32(int32_t op)
+int32_t test_svqdecd_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_n_s32,,)(op, SV_VL4, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqdecd_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecd.n64(i64 [[OP:%.*]], i32 5, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecd_pat_n_s64(int64_t op)
+int64_t test_svqdecd_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_n_s64,,)(op, SV_VL5, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqdecd_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecd.n32(i32 [[OP:%.*]], i32 6, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecd_pat_n_u32(uint32_t op)
+uint32_t test_svqdecd_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_n_u32,,)(op, SV_VL6, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqdecd_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecd.n64(i64 [[OP:%.*]], i32 7, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecd_pat_n_u64(uint64_t op)
+uint64_t test_svqdecd_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_n_u64,,)(op, SV_VL7, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqdecd_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqdecd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqdecd_s64(svint64_t op)
+svint64_t test_svqdecd_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_s64,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint64_t test_svqdecd_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqdecd_u64(svuint64_t op)
+svuint64_t test_svqdecd_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd,_u64,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint64_t test_svqdecd_u64(svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqdecd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 8, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqdecd_pat_s64(svint64_t op)
+svint64_t test_svqdecd_pat_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_s64,,)(op, SV_VL8, 16);
 }
@@ -204,7 +212,7 @@ svint64_t test_svqdecd_pat_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 9, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqdecd_pat_u64(svuint64_t op)
+svuint64_t test_svqdecd_pat_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecd_pat,_u64,,)(op, SV_VL16, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c
index 847113c986fbb..2e851f1cbbbd9 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdech.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdech.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdech_n_s32(int32_t op)
+int32_t test_svqdech_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqdech_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdech.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdech_n_s32_1(int32_t op)
+int32_t test_svqdech_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqdech_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdech.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdech_n_s64(int64_t op)
+int64_t test_svqdech_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqdech_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdech.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdech_n_u32(uint32_t op)
+uint32_t test_svqdech_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqdech_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdech.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdech_n_u64(uint64_t op)
+uint64_t test_svqdech_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqdech_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdech.n32(i32 [[OP:%.*]], i32 10, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdech_pat_n_s32(int32_t op)
+int32_t test_svqdech_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_n_s32,,)(op, SV_VL32, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqdech_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdech.n64(i64 [[OP:%.*]], i32 11, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdech_pat_n_s64(int64_t op)
+int64_t test_svqdech_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_n_s64,,)(op, SV_VL64, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqdech_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdech.n32(i32 [[OP:%.*]], i32 12, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdech_pat_n_u32(uint32_t op)
+uint32_t test_svqdech_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_n_u32,,)(op, SV_VL128, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqdech_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdech.n64(i64 [[OP:%.*]], i32 13, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdech_pat_n_u64(uint64_t op)
+uint64_t test_svqdech_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_n_u64,,)(op, SV_VL256, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqdech_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqdech.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqdech_s16(svint16_t op)
+svint16_t test_svqdech_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_s16,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint16_t test_svqdech_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqdech.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqdech_u16(svuint16_t op)
+svuint16_t test_svqdech_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech,_u16,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint16_t test_svqdech_u16(svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqdech.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 29, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqdech_pat_s16(svint16_t op)
+svint16_t test_svqdech_pat_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_s16,,)(op, SV_MUL4, 16);
 }
@@ -204,7 +212,7 @@ svint16_t test_svqdech_pat_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqdech.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 30, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqdech_pat_u16(svuint16_t op)
+svuint16_t test_svqdech_pat_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdech_pat,_u16,,)(op, SV_MUL3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c
index ca24390d6cf7a..a88f826a18e27 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecp.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecp.n32.nxv16i1(i32 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecp_n_s32_b8(int32_t op, svbool_t pg)
+int32_t test_svqdecp_n_s32_b8(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s32,_b8,)(op, pg);
 }
@@ -41,7 +49,7 @@ int32_t test_svqdecp_n_s32_b8(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecp.n32.nxv8i1(i32 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqdecp_n_s32_b16(int32_t op, svbool_t pg)
+int32_t test_svqdecp_n_s32_b16(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s32,_b16,)(op, pg);
 }
@@ -58,7 +66,7 @@ int32_t test_svqdecp_n_s32_b16(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecp.n32.nxv4i1(i32 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqdecp_n_s32_b32(int32_t op, svbool_t pg)
+int32_t test_svqdecp_n_s32_b32(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s32,_b32,)(op, pg);
 }
@@ -75,7 +83,7 @@ int32_t test_svqdecp_n_s32_b32(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecp.n32.nxv2i1(i32 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqdecp_n_s32_b64(int32_t op, svbool_t pg)
+int32_t test_svqdecp_n_s32_b64(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s32,_b64,)(op, pg);
 }
@@ -90,7 +98,7 @@ int32_t test_svqdecp_n_s32_b64(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecp.n64.nxv16i1(i64 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecp_n_s64_b8(int64_t op, svbool_t pg)
+int64_t test_svqdecp_n_s64_b8(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s64,_b8,)(op, pg);
 }
@@ -107,7 +115,7 @@ int64_t test_svqdecp_n_s64_b8(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecp.n64.nxv8i1(i64 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqdecp_n_s64_b16(int64_t op, svbool_t pg)
+int64_t test_svqdecp_n_s64_b16(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s64,_b16,)(op, pg);
 }
@@ -124,7 +132,7 @@ int64_t test_svqdecp_n_s64_b16(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecp.n64.nxv4i1(i64 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqdecp_n_s64_b32(int64_t op, svbool_t pg)
+int64_t test_svqdecp_n_s64_b32(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s64,_b32,)(op, pg);
 }
@@ -141,7 +149,7 @@ int64_t test_svqdecp_n_s64_b32(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecp.n64.nxv2i1(i64 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqdecp_n_s64_b64(int64_t op, svbool_t pg)
+int64_t test_svqdecp_n_s64_b64(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_s64,_b64,)(op, pg);
 }
@@ -156,7 +164,7 @@ int64_t test_svqdecp_n_s64_b64(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecp.n32.nxv16i1(i32 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecp_n_u32_b8(uint32_t op, svbool_t pg)
+uint32_t test_svqdecp_n_u32_b8(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u32,_b8,)(op, pg);
 }
@@ -173,7 +181,7 @@ uint32_t test_svqdecp_n_u32_b8(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecp.n32.nxv8i1(i32 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqdecp_n_u32_b16(uint32_t op, svbool_t pg)
+uint32_t test_svqdecp_n_u32_b16(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u32,_b16,)(op, pg);
 }
@@ -190,7 +198,7 @@ uint32_t test_svqdecp_n_u32_b16(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecp.n32.nxv4i1(i32 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqdecp_n_u32_b32(uint32_t op, svbool_t pg)
+uint32_t test_svqdecp_n_u32_b32(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u32,_b32,)(op, pg);
 }
@@ -207,7 +215,7 @@ uint32_t test_svqdecp_n_u32_b32(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecp.n32.nxv2i1(i32 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqdecp_n_u32_b64(uint32_t op, svbool_t pg)
+uint32_t test_svqdecp_n_u32_b64(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u32,_b64,)(op, pg);
 }
@@ -222,7 +230,7 @@ uint32_t test_svqdecp_n_u32_b64(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecp.n64.nxv16i1(i64 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecp_n_u64_b8(uint64_t op, svbool_t pg)
+uint64_t test_svqdecp_n_u64_b8(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u64,_b8,)(op, pg);
 }
@@ -239,7 +247,7 @@ uint64_t test_svqdecp_n_u64_b8(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecp.n64.nxv8i1(i64 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqdecp_n_u64_b16(uint64_t op, svbool_t pg)
+uint64_t test_svqdecp_n_u64_b16(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u64,_b16,)(op, pg);
 }
@@ -256,7 +264,7 @@ uint64_t test_svqdecp_n_u64_b16(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecp.n64.nxv4i1(i64 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqdecp_n_u64_b32(uint64_t op, svbool_t pg)
+uint64_t test_svqdecp_n_u64_b32(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u64,_b32,)(op, pg);
 }
@@ -273,7 +281,7 @@ uint64_t test_svqdecp_n_u64_b32(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecp.n64.nxv2i1(i64 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqdecp_n_u64_b64(uint64_t op, svbool_t pg)
+uint64_t test_svqdecp_n_u64_b64(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_n_u64,_b64,)(op, pg);
 }
@@ -290,7 +298,7 @@ uint64_t test_svqdecp_n_u64_b64(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqdecp.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svqdecp_s16(svint16_t op, svbool_t pg)
+svint16_t test_svqdecp_s16(svint16_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_s16,,)(op, pg);
 }
@@ -307,7 +315,7 @@ svint16_t test_svqdecp_s16(svint16_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqdecp.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svqdecp_s32(svint32_t op, svbool_t pg)
+svint32_t test_svqdecp_s32(svint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_s32,,)(op, pg);
 }
@@ -324,7 +332,7 @@ svint32_t test_svqdecp_s32(svint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqdecp.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svqdecp_s64(svint64_t op, svbool_t pg)
+svint64_t test_svqdecp_s64(svint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_s64,,)(op, pg);
 }
@@ -341,7 +349,7 @@ svint64_t test_svqdecp_s64(svint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqdecp.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svqdecp_u16(svuint16_t op, svbool_t pg)
+svuint16_t test_svqdecp_u16(svuint16_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_u16,,)(op, pg);
 }
@@ -358,7 +366,7 @@ svuint16_t test_svqdecp_u16(svuint16_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecp.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svqdecp_u32(svuint32_t op, svbool_t pg)
+svuint32_t test_svqdecp_u32(svuint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_u32,,)(op, pg);
 }
@@ -375,7 +383,7 @@ svuint32_t test_svqdecp_u32(svuint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqdecp.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svqdecp_u64(svuint64_t op, svbool_t pg)
+svuint64_t test_svqdecp_u64(svuint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecp,_u64,,)(op, pg);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecw.c
index 740f48f4fa0d2..0e0d184791e15 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qdecw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecw.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecw_n_s32(int32_t op)
+int32_t test_svqdecw_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqdecw_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecw.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecw_n_s32_1(int32_t op)
+int32_t test_svqdecw_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqdecw_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecw.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecw_n_s64(int64_t op)
+int64_t test_svqdecw_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqdecw_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecw.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecw_n_u32(uint32_t op)
+uint32_t test_svqdecw_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqdecw_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecw.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecw_n_u64(uint64_t op)
+uint64_t test_svqdecw_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqdecw_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqdecw.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqdecw_pat_n_s32(int32_t op)
+int32_t test_svqdecw_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_n_s32,,)(op, SV_ALL, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqdecw_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqdecw.n64(i64 [[OP:%.*]], i32 0, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqdecw_pat_n_s64(int64_t op)
+int64_t test_svqdecw_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_n_s64,,)(op, SV_POW2, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqdecw_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqdecw.n32(i32 [[OP:%.*]], i32 1, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqdecw_pat_n_u32(uint32_t op)
+uint32_t test_svqdecw_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_n_u32,,)(op, SV_VL1, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqdecw_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqdecw.n64(i64 [[OP:%.*]], i32 2, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqdecw_pat_n_u64(uint64_t op)
+uint64_t test_svqdecw_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_n_u64,,)(op, SV_VL2, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqdecw_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqdecw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqdecw_s32(svint32_t op)
+svint32_t test_svqdecw_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_s32,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint32_t test_svqdecw_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqdecw_u32(svuint32_t op)
+svuint32_t test_svqdecw_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw,_u32,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint32_t test_svqdecw_u32(svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqdecw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 3, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqdecw_pat_s32(svint32_t op)
+svint32_t test_svqdecw_pat_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_s32,,)(op, SV_VL3, 16);
 }
@@ -204,7 +212,7 @@ svint32_t test_svqdecw_pat_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqdecw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 4, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqdecw_pat_u32(svuint32_t op)
+svuint32_t test_svqdecw_pat_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqdecw_pat,_u32,,)(op, SV_VL4, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincb.c
index 30f9d9f9423e5..f2b69e7436f30 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincb.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincb_n_s32(int32_t op)
+int32_t test_svqincb_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqincb_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincb.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincb_n_s32_1(int32_t op)
+int32_t test_svqincb_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqincb_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincb.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincb_n_s64(int64_t op)
+int64_t test_svqincb_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqincb_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincb.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincb_n_u32(uint32_t op)
+uint32_t test_svqincb_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqincb_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincb.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincb_n_u64(uint64_t op)
+uint64_t test_svqincb_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqincb_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincb.n32(i32 [[OP:%.*]], i32 5, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincb_pat_n_s32(int32_t op)
+int32_t test_svqincb_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb_pat,_n_s32,,)(op, SV_VL5, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqincb_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincb.n64(i64 [[OP:%.*]], i32 6, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincb_pat_n_s64(int64_t op)
+int64_t test_svqincb_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb_pat,_n_s64,,)(op, SV_VL6, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqincb_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincb.n32(i32 [[OP:%.*]], i32 7, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincb_pat_n_u32(uint32_t op)
+uint32_t test_svqincb_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb_pat,_n_u32,,)(op, SV_VL7, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqincb_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincb.n64(i64 [[OP:%.*]], i32 8, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincb_pat_n_u64(uint64_t op)
+uint64_t test_svqincb_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincb_pat,_n_u64,,)(op, SV_VL8, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincd.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincd.c
index 706fbbd9e167d..27d0964fae5d2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincd.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincd.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincd.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincd_n_s32(int32_t op)
+int32_t test_svqincd_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqincd_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincd.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincd_n_s32_1(int32_t op)
+int32_t test_svqincd_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqincd_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincd.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincd_n_s64(int64_t op)
+int64_t test_svqincd_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqincd_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincd.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincd_n_u32(uint32_t op)
+uint32_t test_svqincd_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqincd_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincd.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincd_n_u64(uint64_t op)
+uint64_t test_svqincd_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqincd_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincd.n32(i32 [[OP:%.*]], i32 9, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincd_pat_n_s32(int32_t op)
+int32_t test_svqincd_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_n_s32,,)(op, SV_VL16, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqincd_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincd.n64(i64 [[OP:%.*]], i32 10, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincd_pat_n_s64(int64_t op)
+int64_t test_svqincd_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_n_s64,,)(op, SV_VL32, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqincd_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincd.n32(i32 [[OP:%.*]], i32 11, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincd_pat_n_u32(uint32_t op)
+uint32_t test_svqincd_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_n_u32,,)(op, SV_VL64, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqincd_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincd.n64(i64 [[OP:%.*]], i32 12, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincd_pat_n_u64(uint64_t op)
+uint64_t test_svqincd_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_n_u64,,)(op, SV_VL128, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqincd_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqincd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqincd_s64(svint64_t op)
+svint64_t test_svqincd_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_s64,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint64_t test_svqincd_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqincd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqincd_u64(svuint64_t op)
+svuint64_t test_svqincd_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd,_u64,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint64_t test_svqincd_u64(svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqincd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 13, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqincd_pat_s64(svint64_t op)
+svint64_t test_svqincd_pat_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_s64,,)(op, SV_VL256, 16);
 }
@@ -204,7 +212,7 @@ svint64_t test_svqincd_pat_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqincd.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], i32 29, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqincd_pat_u64(svuint64_t op)
+svuint64_t test_svqincd_pat_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincd_pat,_u64,,)(op, SV_MUL4, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qinch.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qinch.c
index 5fdb898d63506..05471619ad7e6 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qinch.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qinch.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqinch.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqinch_n_s32(int32_t op)
+int32_t test_svqinch_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqinch_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqinch.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqinch_n_s32_1(int32_t op)
+int32_t test_svqinch_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqinch_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqinch.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqinch_n_s64(int64_t op)
+int64_t test_svqinch_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqinch_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqinch.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqinch_n_u32(uint32_t op)
+uint32_t test_svqinch_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqinch_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqinch.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqinch_n_u64(uint64_t op)
+uint64_t test_svqinch_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqinch_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqinch.n32(i32 [[OP:%.*]], i32 30, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqinch_pat_n_s32(int32_t op)
+int32_t test_svqinch_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_n_s32,,)(op, SV_MUL3, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqinch_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqinch.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqinch_pat_n_s64(int64_t op)
+int64_t test_svqinch_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_n_s64,,)(op, SV_ALL, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqinch_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqinch.n32(i32 [[OP:%.*]], i32 0, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqinch_pat_n_u32(uint32_t op)
+uint32_t test_svqinch_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_n_u32,,)(op, SV_POW2, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqinch_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqinch.n64(i64 [[OP:%.*]], i32 1, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqinch_pat_n_u64(uint64_t op)
+uint64_t test_svqinch_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_n_u64,,)(op, SV_VL1, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqinch_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqinch.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqinch_s16(svint16_t op)
+svint16_t test_svqinch_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_s16,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint16_t test_svqinch_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqinch.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqinch_u16(svuint16_t op)
+svuint16_t test_svqinch_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch,_u16,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint16_t test_svqinch_u16(svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqinch.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 2, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqinch_pat_s16(svint16_t op)
+svint16_t test_svqinch_pat_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_s16,,)(op, SV_VL2, 16);
 }
@@ -204,7 +212,7 @@ svint16_t test_svqinch_pat_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqinch.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], i32 3, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqinch_pat_u16(svuint16_t op)
+svuint16_t test_svqinch_pat_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqinch_pat,_u16,,)(op, SV_VL3, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincp.c
index a0f0991302a4c..6f48aa1972b85 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincp.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincp.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincp.n32.nxv16i1(i32 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincp_n_s32_b8(int32_t op, svbool_t pg)
+int32_t test_svqincp_n_s32_b8(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s32,_b8,)(op, pg);
 }
@@ -41,7 +49,7 @@ int32_t test_svqincp_n_s32_b8(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqincp.n32.nxv8i1(i32 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqincp_n_s32_b16(int32_t op, svbool_t pg)
+int32_t test_svqincp_n_s32_b16(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s32,_b16,)(op, pg);
 }
@@ -58,7 +66,7 @@ int32_t test_svqincp_n_s32_b16(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqincp.n32.nxv4i1(i32 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqincp_n_s32_b32(int32_t op, svbool_t pg)
+int32_t test_svqincp_n_s32_b32(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s32,_b32,)(op, pg);
 }
@@ -75,7 +83,7 @@ int32_t test_svqincp_n_s32_b32(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.sqincp.n32.nxv2i1(i32 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-int32_t test_svqincp_n_s32_b64(int32_t op, svbool_t pg)
+int32_t test_svqincp_n_s32_b64(int32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s32,_b64,)(op, pg);
 }
@@ -90,7 +98,7 @@ int32_t test_svqincp_n_s32_b64(int32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincp.n64.nxv16i1(i64 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincp_n_s64_b8(int64_t op, svbool_t pg)
+int64_t test_svqincp_n_s64_b8(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s64,_b8,)(op, pg);
 }
@@ -107,7 +115,7 @@ int64_t test_svqincp_n_s64_b8(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqincp.n64.nxv8i1(i64 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqincp_n_s64_b16(int64_t op, svbool_t pg)
+int64_t test_svqincp_n_s64_b16(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s64,_b16,)(op, pg);
 }
@@ -124,7 +132,7 @@ int64_t test_svqincp_n_s64_b16(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqincp.n64.nxv4i1(i64 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqincp_n_s64_b32(int64_t op, svbool_t pg)
+int64_t test_svqincp_n_s64_b32(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s64,_b32,)(op, pg);
 }
@@ -141,7 +149,7 @@ int64_t test_svqincp_n_s64_b32(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.sqincp.n64.nxv2i1(i64 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-int64_t test_svqincp_n_s64_b64(int64_t op, svbool_t pg)
+int64_t test_svqincp_n_s64_b64(int64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_s64,_b64,)(op, pg);
 }
@@ -156,7 +164,7 @@ int64_t test_svqincp_n_s64_b64(int64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincp.n32.nxv16i1(i32 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincp_n_u32_b8(uint32_t op, svbool_t pg)
+uint32_t test_svqincp_n_u32_b8(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u32,_b8,)(op, pg);
 }
@@ -173,7 +181,7 @@ uint32_t test_svqincp_n_u32_b8(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqincp.n32.nxv8i1(i32 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqincp_n_u32_b16(uint32_t op, svbool_t pg)
+uint32_t test_svqincp_n_u32_b16(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u32,_b16,)(op, pg);
 }
@@ -190,7 +198,7 @@ uint32_t test_svqincp_n_u32_b16(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqincp.n32.nxv4i1(i32 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqincp_n_u32_b32(uint32_t op, svbool_t pg)
+uint32_t test_svqincp_n_u32_b32(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u32,_b32,)(op, pg);
 }
@@ -207,7 +215,7 @@ uint32_t test_svqincp_n_u32_b32(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.aarch64.sve.uqincp.n32.nxv2i1(i32 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i32 [[TMP1]]
 //
-uint32_t test_svqincp_n_u32_b64(uint32_t op, svbool_t pg)
+uint32_t test_svqincp_n_u32_b64(uint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u32,_b64,)(op, pg);
 }
@@ -222,7 +230,7 @@ uint32_t test_svqincp_n_u32_b64(uint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincp.n64.nxv16i1(i64 [[OP:%.*]], <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincp_n_u64_b8(uint64_t op, svbool_t pg)
+uint64_t test_svqincp_n_u64_b8(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u64,_b8,)(op, pg);
 }
@@ -239,7 +247,7 @@ uint64_t test_svqincp_n_u64_b8(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqincp.n64.nxv8i1(i64 [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqincp_n_u64_b16(uint64_t op, svbool_t pg)
+uint64_t test_svqincp_n_u64_b16(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u64,_b16,)(op, pg);
 }
@@ -256,7 +264,7 @@ uint64_t test_svqincp_n_u64_b16(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqincp.n64.nxv4i1(i64 [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqincp_n_u64_b32(uint64_t op, svbool_t pg)
+uint64_t test_svqincp_n_u64_b32(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u64,_b32,)(op, pg);
 }
@@ -273,7 +281,7 @@ uint64_t test_svqincp_n_u64_b32(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.aarch64.sve.uqincp.n64.nxv2i1(i64 [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret i64 [[TMP1]]
 //
-uint64_t test_svqincp_n_u64_b64(uint64_t op, svbool_t pg)
+uint64_t test_svqincp_n_u64_b64(uint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_n_u64,_b64,)(op, pg);
 }
@@ -290,7 +298,7 @@ uint64_t test_svqincp_n_u64_b64(uint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqincp.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svqincp_s16(svint16_t op, svbool_t pg)
+svint16_t test_svqincp_s16(svint16_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_s16,,)(op, pg);
 }
@@ -307,7 +315,7 @@ svint16_t test_svqincp_s16(svint16_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqincp.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svqincp_s32(svint32_t op, svbool_t pg)
+svint32_t test_svqincp_s32(svint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_s32,,)(op, pg);
 }
@@ -324,7 +332,7 @@ svint32_t test_svqincp_s32(svint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqincp.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svqincp_s64(svint64_t op, svbool_t pg)
+svint64_t test_svqincp_s64(svint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_s64,,)(op, pg);
 }
@@ -341,7 +349,7 @@ svint64_t test_svqincp_s64(svint64_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqincp.nxv8i16(<vscale x 8 x i16> [[OP:%.*]], <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svqincp_u16(svuint16_t op, svbool_t pg)
+svuint16_t test_svqincp_u16(svuint16_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_u16,,)(op, pg);
 }
@@ -358,7 +366,7 @@ svuint16_t test_svqincp_u16(svuint16_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqincp.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svqincp_u32(svuint32_t op, svbool_t pg)
+svuint32_t test_svqincp_u32(svuint32_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_u32,,)(op, pg);
 }
@@ -375,7 +383,7 @@ svuint32_t test_svqincp_u32(svuint32_t op, svbool_t pg)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqincp.nxv2i64(<vscale x 2 x i64> [[OP:%.*]], <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svqincp_u64(svuint64_t op, svbool_t pg)
+svuint64_t test_svqincp_u64(svuint64_t op, svbool_t pg) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincp,_u64,,)(op, pg);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincw.c
index 664328a826878..2cb0be931db8f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qincw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincw.n32(i32 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincw_n_s32(int32_t op)
+int32_t test_svqincw_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_n_s32,,)(op, 1);
 }
@@ -39,7 +47,7 @@ int32_t test_svqincw_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincw.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincw_n_s32_1(int32_t op)
+int32_t test_svqincw_n_s32_1(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_n_s32,,)(op, 16);
 }
@@ -54,7 +62,7 @@ int32_t test_svqincw_n_s32_1(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincw.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincw_n_s64(int64_t op)
+int64_t test_svqincw_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_n_s64,,)(op, 1);
 }
@@ -69,7 +77,7 @@ int64_t test_svqincw_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincw.n32(i32 [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincw_n_u32(uint32_t op)
+uint32_t test_svqincw_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_n_u32,,)(op, 16);
 }
@@ -84,7 +92,7 @@ uint32_t test_svqincw_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincw.n64(i64 [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincw_n_u64(uint64_t op)
+uint64_t test_svqincw_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_n_u64,,)(op, 1);
 }
@@ -99,7 +107,7 @@ uint64_t test_svqincw_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.sqincw.n32(i32 [[OP:%.*]], i32 4, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-int32_t test_svqincw_pat_n_s32(int32_t op)
+int32_t test_svqincw_pat_n_s32(int32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_n_s32,,)(op, SV_VL4, 16);
 }
@@ -114,7 +122,7 @@ int32_t test_svqincw_pat_n_s32(int32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.sqincw.n64(i64 [[OP:%.*]], i32 5, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-int64_t test_svqincw_pat_n_s64(int64_t op)
+int64_t test_svqincw_pat_n_s64(int64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_n_s64,,)(op, SV_VL5, 1);
 }
@@ -129,7 +137,7 @@ int64_t test_svqincw_pat_n_s64(int64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.aarch64.sve.uqincw.n32(i32 [[OP:%.*]], i32 6, i32 16)
 // CPP-CHECK-NEXT:    ret i32 [[TMP0]]
 //
-uint32_t test_svqincw_pat_n_u32(uint32_t op)
+uint32_t test_svqincw_pat_n_u32(uint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_n_u32,,)(op, SV_VL6, 16);
 }
@@ -144,7 +152,7 @@ uint32_t test_svqincw_pat_n_u32(uint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sve.uqincw.n64(i64 [[OP:%.*]], i32 7, i32 1)
 // CPP-CHECK-NEXT:    ret i64 [[TMP0]]
 //
-uint64_t test_svqincw_pat_n_u64(uint64_t op)
+uint64_t test_svqincw_pat_n_u64(uint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_n_u64,,)(op, SV_VL7, 1);
 }
@@ -159,7 +167,7 @@ uint64_t test_svqincw_pat_n_u64(uint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqincw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 31, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqincw_s32(svint32_t op)
+svint32_t test_svqincw_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_s32,,)(op, 16);
 }
@@ -174,7 +182,7 @@ svint32_t test_svqincw_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqincw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 31, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqincw_u32(svuint32_t op)
+svuint32_t test_svqincw_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw,_u32,,)(op, 1);
 }
@@ -189,7 +197,7 @@ svuint32_t test_svqincw_u32(svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqincw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 8, i32 16)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqincw_pat_s32(svint32_t op)
+svint32_t test_svqincw_pat_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_s32,,)(op, SV_VL8, 16);
 }
@@ -204,7 +212,7 @@ svint32_t test_svqincw_pat_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqincw.nxv4i32(<vscale x 4 x i32> [[OP:%.*]], i32 9, i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqincw_pat_u32(svuint32_t op)
+svuint32_t test_svqincw_pat_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqincw_pat,_u32,,)(op, SV_VL16, 1);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qsub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qsub.c
index d5d413ebdff63..7ae68897643d0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qsub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_qsub.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svqsub_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svqsub_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svqsub_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqsub_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svqsub_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svqsub_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqsub_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svqsub_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svqsub_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqsub_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svqsub_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svqsub_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svqsub_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svqsub_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svqsub_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqsub_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svqsub_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svqsub_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqsub_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svqsub_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svqsub_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqsub_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svqsub_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_u64,,)(op1, op2);
 }
@@ -148,7 +156,7 @@ svuint64_t test_svqsub_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svqsub_n_s8(svint8_t op1, int8_t op2)
+svint8_t test_svqsub_n_s8(svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_s8,,)(op1, op2);
 }
@@ -167,7 +175,7 @@ svint8_t test_svqsub_n_s8(svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svqsub_n_s16(svint16_t op1, int16_t op2)
+svint16_t test_svqsub_n_s16(svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_s16,,)(op1, op2);
 }
@@ -186,7 +194,7 @@ svint16_t test_svqsub_n_s16(svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svqsub_n_s32(svint32_t op1, int32_t op2)
+svint32_t test_svqsub_n_s32(svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_s32,,)(op1, op2);
 }
@@ -205,7 +213,7 @@ svint32_t test_svqsub_n_s32(svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svqsub_n_s64(svint64_t op1, int64_t op2)
+svint64_t test_svqsub_n_s64(svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_s64,,)(op1, op2);
 }
@@ -224,7 +232,7 @@ svint64_t test_svqsub_n_s64(svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.x.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svqsub_n_u8(svuint8_t op1, uint8_t op2)
+svuint8_t test_svqsub_n_u8(svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_u8,,)(op1, op2);
 }
@@ -243,7 +251,7 @@ svuint8_t test_svqsub_n_u8(svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svqsub_n_u16(svuint16_t op1, uint16_t op2)
+svuint16_t test_svqsub_n_u16(svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_u16,,)(op1, op2);
 }
@@ -262,7 +270,7 @@ svuint16_t test_svqsub_n_u16(svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svqsub_n_u32(svuint32_t op1, uint32_t op2)
+svuint32_t test_svqsub_n_u32(svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_u32,,)(op1, op2);
 }
@@ -281,7 +289,7 @@ svuint32_t test_svqsub_n_u32(svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svqsub_n_u64(svuint64_t op1, uint64_t op2)
+svuint64_t test_svqsub_n_u64(svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svqsub,_n_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c
index 997d53755ab27..5b1b91e7ecfde 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rbit.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrbit_s8_z(svbool_t pg, svint8_t op)
+svint8_t test_svrbit_s8_z(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s8,_z,)(pg, op);
 }
@@ -41,7 +49,7 @@ svint8_t test_svrbit_s8_z(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrbit_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svrbit_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s16,_z,)(pg, op);
 }
@@ -58,7 +66,7 @@ svint16_t test_svrbit_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrbit_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svrbit_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s32,_z,)(pg, op);
 }
@@ -75,7 +83,7 @@ svint32_t test_svrbit_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrbit_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svrbit_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s64,_z,)(pg, op);
 }
@@ -90,7 +98,7 @@ svint64_t test_svrbit_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrbit_u8_z(svbool_t pg, svuint8_t op)
+svuint8_t test_svrbit_u8_z(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u8,_z,)(pg, op);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svrbit_u8_z(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrbit_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svrbit_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u16,_z,)(pg, op);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svrbit_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrbit_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svrbit_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u32,_z,)(pg, op);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svrbit_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrbit_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svrbit_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u64,_z,)(pg, op);
 }
@@ -156,7 +164,7 @@ svuint64_t test_svrbit_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrbit_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
+svint8_t test_svrbit_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s8,_m,)(inactive, pg, op);
 }
@@ -173,7 +181,7 @@ svint8_t test_svrbit_s8_m(svint8_t inactive, svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrbit_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svrbit_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s16,_m,)(inactive, pg, op);
 }
@@ -190,7 +198,7 @@ svint16_t test_svrbit_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrbit_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svrbit_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s32,_m,)(inactive, pg, op);
 }
@@ -207,7 +215,7 @@ svint32_t test_svrbit_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrbit_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svrbit_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s64,_m,)(inactive, pg, op);
 }
@@ -222,7 +230,7 @@ svint64_t test_svrbit_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> [[INACTIVE:%.*]], <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrbit_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
+svuint8_t test_svrbit_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u8,_m,)(inactive, pg, op);
 }
@@ -239,7 +247,7 @@ svuint8_t test_svrbit_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrbit_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svrbit_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u16,_m,)(inactive, pg, op);
 }
@@ -256,7 +264,7 @@ svuint16_t test_svrbit_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrbit_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svrbit_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u32,_m,)(inactive, pg, op);
 }
@@ -273,7 +281,7 @@ svuint32_t test_svrbit_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrbit_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svrbit_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u64,_m,)(inactive, pg, op);
 }
@@ -288,7 +296,7 @@ svuint64_t test_svrbit_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrbit_s8_x(svbool_t pg, svint8_t op)
+svint8_t test_svrbit_s8_x(svbool_t pg, svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s8,_x,)(pg, op);
 }
@@ -305,7 +313,7 @@ svint8_t test_svrbit_s8_x(svbool_t pg, svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrbit_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svrbit_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s16,_x,)(pg, op);
 }
@@ -322,7 +330,7 @@ svint16_t test_svrbit_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrbit_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svrbit_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s32,_x,)(pg, op);
 }
@@ -339,7 +347,7 @@ svint32_t test_svrbit_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrbit_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svrbit_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_s64,_x,)(pg, op);
 }
@@ -354,7 +362,7 @@ svint64_t test_svrbit_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rbit.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrbit_u8_x(svbool_t pg, svuint8_t op)
+svuint8_t test_svrbit_u8_x(svbool_t pg, svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u8,_x,)(pg, op);
 }
@@ -371,7 +379,7 @@ svuint8_t test_svrbit_u8_x(svbool_t pg, svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rbit.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrbit_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svrbit_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u16,_x,)(pg, op);
 }
@@ -388,7 +396,7 @@ svuint16_t test_svrbit_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrbit_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svrbit_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u32,_x,)(pg, op);
 }
@@ -405,7 +413,7 @@ svuint32_t test_svrbit_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rbit.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrbit_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svrbit_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrbit,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c
index 344ea90299dd8..067512d27d57f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpe.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frecpe.x.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svrecpe_f16(svfloat16_t op)
+svfloat16_t test_svrecpe_f16(svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpe,_f16,,)(op);
 }
@@ -39,7 +47,7 @@ svfloat16_t test_svrecpe_f16(svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpe.x.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svrecpe_f32(svfloat32_t op)
+svfloat32_t test_svrecpe_f32(svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpe,_f32,,)(op);
 }
@@ -54,7 +62,7 @@ svfloat32_t test_svrecpe_f32(svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frecpe.x.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svrecpe_f64(svfloat64_t op)
+svfloat64_t test_svrecpe_f64(svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpe,_f64,,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c
index 7be5b15674a06..e1a5743d67237 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recps.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svrecps_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svrecps_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecps,_f16,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svfloat16_t test_svrecps_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svrecps_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svrecps_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecps,_f32,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svfloat32_t test_svrecps_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svrecps_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svrecps_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecps,_f64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c
index bfccfb840c47a..623bdc89a20cc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_recpx.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrecpx_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrecpx_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrecpx_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrecpx_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrecpx_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrecpx_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrecpx_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrecpx_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrecpx_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrecpx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrecpx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrecpx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrecpx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrecpx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrecpx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrecpx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrecpx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrecpx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frecpx.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrecpx_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrecpx_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrecpx_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrecpx_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrecpx_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrecpx_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frecpx.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrecpx_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrecpx_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrecpx,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev-bfloat.c
index ff436a62f807f..9b3e813fa9694 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.rev.nxv8bf16(<vscale x 8 x bfloat> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svrev_bf16(svbfloat16_t op)
+svbfloat16_t test_svrev_bf16(svbfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_bf16,,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
index 0bd7fe5e7d226..3c0ae7df79644 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rev.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svrev_s8(svint8_t op)
+svint8_t test_svrev_s8(svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_s8,,)(op);
 }
@@ -39,7 +47,7 @@ svint8_t test_svrev_s8(svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svrev_s16(svint16_t op)
+svint16_t test_svrev_s16(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_s16,,)(op);
 }
@@ -54,7 +62,7 @@ svint16_t test_svrev_s16(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svrev_s32(svint32_t op)
+svint32_t test_svrev_s32(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_s32,,)(op);
 }
@@ -69,7 +77,7 @@ svint32_t test_svrev_s32(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svrev_s64(svint64_t op)
+svint64_t test_svrev_s64(svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_s64,,)(op);
 }
@@ -84,7 +92,7 @@ svint64_t test_svrev_s64(svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.rev.nxv16i8(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svrev_u8(svuint8_t op)
+svuint8_t test_svrev_u8(svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_u8,,)(op);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svrev_u8(svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.rev.nxv8i16(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svrev_u16(svuint16_t op)
+svuint16_t test_svrev_u16(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_u16,,)(op);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svrev_u16(svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rev.nxv4i32(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svrev_u32(svuint32_t op)
+svuint32_t test_svrev_u32(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_u32,,)(op);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svrev_u32(svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.rev.nxv2i64(<vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svrev_u64(svuint64_t op)
+svuint64_t test_svrev_u64(svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_u64,,)(op);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svrev_u64(svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.rev.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svrev_f16(svfloat16_t op)
+svfloat16_t test_svrev_f16(svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_f16,,)(op);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svrev_f16(svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.rev.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svrev_f32(svfloat32_t op)
+svfloat32_t test_svrev_f32(svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_f32,,)(op);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svrev_f32(svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.rev.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svrev_f64(svfloat64_t op)
+svfloat64_t test_svrev_f64(svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrev,_f64,,)(op);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svrev_f64(svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.nxv16i1(<vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svrev_b8(svbool_t op)
+svbool_t test_svrev_b8(svbool_t op) MODE_ATTR
 {
   return svrev_b8(op);
 }
@@ -204,7 +212,7 @@ svbool_t test_svrev_b8(svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b16(<vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svrev_b16(svbool_t op)
+svbool_t test_svrev_b16(svbool_t op) MODE_ATTR
 {
   return svrev_b16(op);
 }
@@ -219,7 +227,7 @@ svbool_t test_svrev_b16(svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b32(<vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svrev_b32(svbool_t op)
+svbool_t test_svrev_b32(svbool_t op) MODE_ATTR
 {
   return svrev_b32(op);
 }
@@ -234,7 +242,7 @@ svbool_t test_svrev_b32(svbool_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.rev.b64(<vscale x 16 x i1> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svrev_b64(svbool_t op)
+svbool_t test_svrev_b64(svbool_t op) MODE_ATTR
 {
   return svrev_b64(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c
index f5508d2c8d9d6..a6fc93eb5164a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revb.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevb_s16_z(svbool_t pg, svint16_t op)
+svint16_t test_svrevb_s16_z(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint16_t test_svrevb_s16_z(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevb_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svrevb_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svint32_t test_svrevb_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevb_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svrevb_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svint64_t test_svrevb_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevb_u16_z(svbool_t pg, svuint16_t op)
+svuint16_t test_svrevb_u16_z(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u16,_z,)(pg, op);
 }
@@ -94,7 +102,7 @@ svuint16_t test_svrevb_u16_z(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevb_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svrevb_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u32,_z,)(pg, op);
 }
@@ -111,7 +119,7 @@ svuint32_t test_svrevb_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevb_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevb_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u64,_z,)(pg, op);
 }
@@ -128,7 +136,7 @@ svuint64_t test_svrevb_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
+svint16_t test_svrevb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s16,_m,)(inactive, pg, op);
 }
@@ -145,7 +153,7 @@ svint16_t test_svrevb_s16_m(svint16_t inactive, svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svrevb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s32,_m,)(inactive, pg, op);
 }
@@ -162,7 +170,7 @@ svint32_t test_svrevb_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svrevb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s64,_m,)(inactive, pg, op);
 }
@@ -179,7 +187,7 @@ svint64_t test_svrevb_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
+svuint16_t test_svrevb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u16,_m,)(inactive, pg, op);
 }
@@ -196,7 +204,7 @@ svuint16_t test_svrevb_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svrevb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u32,_m,)(inactive, pg, op);
 }
@@ -213,7 +221,7 @@ svuint32_t test_svrevb_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svrevb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u64,_m,)(inactive, pg, op);
 }
@@ -230,7 +238,7 @@ svuint64_t test_svrevb_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svrevb_s16_x(svbool_t pg, svint16_t op)
+svint16_t test_svrevb_s16_x(svbool_t pg, svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s16,_x,)(pg, op);
 }
@@ -247,7 +255,7 @@ svint16_t test_svrevb_s16_x(svbool_t pg, svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevb_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svrevb_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s32,_x,)(pg, op);
 }
@@ -264,7 +272,7 @@ svint32_t test_svrevb_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevb_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svrevb_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_s64,_x,)(pg, op);
 }
@@ -281,7 +289,7 @@ svint64_t test_svrevb_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.revb.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svrevb_u16_x(svbool_t pg, svuint16_t op)
+svuint16_t test_svrevb_u16_x(svbool_t pg, svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u16,_x,)(pg, op);
 }
@@ -298,7 +306,7 @@ svuint16_t test_svrevb_u16_x(svbool_t pg, svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevb_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svrevb_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u32,_x,)(pg, op);
 }
@@ -315,7 +323,7 @@ svuint32_t test_svrevb_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revb.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevb_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevb_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevb,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c
index e1ceeda3423ed..7a21eee46cfc5 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revh.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevh_s32_z(svbool_t pg, svint32_t op)
+svint32_t test_svrevh_s32_z(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s32,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint32_t test_svrevh_s32_z(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevh_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svrevh_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s64,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svint64_t test_svrevh_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevh_u32_z(svbool_t pg, svuint32_t op)
+svuint32_t test_svrevh_u32_z(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u32,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svuint32_t test_svrevh_u32_z(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevh_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevh_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u64,_z,)(pg, op);
 }
@@ -94,7 +102,7 @@ svuint64_t test_svrevh_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevh_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
+svint32_t test_svrevh_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svint32_t test_svrevh_s32_m(svint32_t inactive, svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevh_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svrevh_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svint64_t test_svrevh_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevh_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
+svuint32_t test_svrevh_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u32,_m,)(inactive, pg, op);
 }
@@ -145,7 +153,7 @@ svuint32_t test_svrevh_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevh_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svrevh_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u64,_m,)(inactive, pg, op);
 }
@@ -162,7 +170,7 @@ svuint64_t test_svrevh_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svrevh_s32_x(svbool_t pg, svint32_t op)
+svint32_t test_svrevh_s32_x(svbool_t pg, svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s32,_x,)(pg, op);
 }
@@ -179,7 +187,7 @@ svint32_t test_svrevh_s32_x(svbool_t pg, svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevh_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svrevh_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_s64,_x,)(pg, op);
 }
@@ -196,7 +204,7 @@ svint64_t test_svrevh_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svrevh_u32_x(svbool_t pg, svuint32_t op)
+svuint32_t test_svrevh_u32_x(svbool_t pg, svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u32,_x,)(pg, op);
 }
@@ -213,7 +221,7 @@ svuint32_t test_svrevh_u32_x(svbool_t pg, svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revh.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevh_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevh_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevh,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c
index 8798f11adf27b..9ec50cb65ad32 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_revw.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevw_s64_z(svbool_t pg, svint64_t op)
+svint64_t test_svrevw_s64_z(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_s64,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svint64_t test_svrevw_s64_z(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevw_u64_z(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevw_u64_z(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_u64,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svuint64_t test_svrevw_u64_z(svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
+svint64_t test_svrevw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_s64,_m,)(inactive, pg, op);
 }
@@ -77,7 +85,7 @@ svint64_t test_svrevw_s64_m(svint64_t inactive, svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
+svuint64_t test_svrevw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_u64,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svuint64_t test_svrevw_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svrevw_s64_x(svbool_t pg, svint64_t op)
+svint64_t test_svrevw_s64_x(svbool_t pg, svint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_s64,_x,)(pg, op);
 }
@@ -111,7 +119,7 @@ svint64_t test_svrevw_s64_x(svbool_t pg, svint64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svrevw_u64_x(svbool_t pg, svuint64_t op)
+svuint64_t test_svrevw_u64_x(svbool_t pg, svuint64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrevw,_u64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c
index 124cbce2dbc90..efba77f9becfb 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinta.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinta_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinta_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrinta_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinta_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinta_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrinta_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinta_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinta_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrinta_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinta_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinta_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrinta_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinta_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinta_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrinta_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinta_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinta_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrinta_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinta.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinta_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinta_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrinta_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinta_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinta_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrinta_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinta.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinta_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinta_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinta,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c
index 4086d4f65072b..aa33c01b689fa 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rinti.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinti_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinti_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrinti_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinti_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinti_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrinti_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinti_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinti_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrinti_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinti_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinti_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrinti_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinti_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinti_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrinti_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinti_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinti_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrinti_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frinti.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrinti_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrinti_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrinti_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrinti_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrinti_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrinti_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frinti.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrinti_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrinti_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrinti,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c
index 03f3a667812f0..d9f6451cd2e37 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintm.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintm_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintm_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrintm_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintm_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintm_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrintm_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintm_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintm_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrintm_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintm_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintm_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrintm_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintm_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintm_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrintm_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintm_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintm_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrintm_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintm.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintm_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintm_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrintm_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintm_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintm_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrintm_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintm.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintm_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintm_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintm,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c
index 4f00f7d4a7f53..acc0c71d18e19 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintn.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintn_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintn_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrintn_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintn_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintn_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrintn_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintn_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintn_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrintn_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintn_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintn_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrintn_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintn_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintn_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrintn_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintn_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintn_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrintn_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintn.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintn_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintn_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrintn_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintn_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintn_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrintn_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintn.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintn_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintn_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintn,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c
index fb5e9a5f55eee..2d3a888a8ba02 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintp.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintp_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintp_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrintp_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintp_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintp_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrintp_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintp_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintp_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrintp_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintp_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintp_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrintp_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintp_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintp_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrintp_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintp_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintp_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrintp_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintp.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintp_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintp_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrintp_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintp_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintp_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrintp_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintp.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintp_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintp_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintp,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c
index fc257faaf83e8..849a75fff02fc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintx.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintx_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintx_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrintx_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintx_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintx_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrintx_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintx_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintx_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrintx_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrintx_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrintx_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrintx_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintx.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintx_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintx_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrintx_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintx_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintx_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrintx_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintx.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintx_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintx_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintx,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c
index 2e405ae59996a..9efe452f78d20 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rintz.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintz_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintz_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svrintz_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintz_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintz_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svrintz_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintz_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintz_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svrintz_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintz_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintz_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svrintz_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintz_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintz_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svrintz_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintz_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintz_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svrintz_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frintz.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svrintz_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svrintz_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svrintz_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svrintz_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svrintz_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svrintz_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frintz.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svrintz_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svrintz_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrintz,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c
index b124a6d586f7d..2c276ec83a058 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrte.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frsqrte.x.nxv8f16(<vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svrsqrte_f16(svfloat16_t op)
+svfloat16_t test_svrsqrte_f16(svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrte,_f16,,)(op);
 }
@@ -39,7 +47,7 @@ svfloat16_t test_svrsqrte_f16(svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frsqrte.x.nxv4f32(<vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svrsqrte_f32(svfloat32_t op)
+svfloat32_t test_svrsqrte_f32(svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrte,_f32,,)(op);
 }
@@ -54,7 +62,7 @@ svfloat32_t test_svrsqrte_f32(svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frsqrte.x.nxv2f64(<vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svrsqrte_f64(svfloat64_t op)
+svfloat64_t test_svrsqrte_f64(svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrte,_f64,,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c
index 4a32324f007cb..8dc65ff93a08b 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_rsqrts.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svrsqrts_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svrsqrts_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrts,_f16,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svfloat16_t test_svrsqrts_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svrsqrts_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svrsqrts_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrts,_f32,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svfloat32_t test_svrsqrts_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svrsqrts_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svrsqrts_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svrsqrts,_f64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c
index e4cc4cb287da1..460bd4b1c849d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_scale.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svscale_f16_z(svbool_t pg, svfloat16_t op1, svint16_t op2)
+svfloat16_t test_svscale_f16_z(svbool_t pg, svfloat16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f16,_z,)(pg, op1, op2);
 }
@@ -47,7 +55,7 @@ svfloat16_t test_svscale_f16_z(svbool_t pg, svfloat16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svscale_f32_z(svbool_t pg, svfloat32_t op1, svint32_t op2)
+svfloat32_t test_svscale_f32_z(svbool_t pg, svfloat32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f32,_z,)(pg, op1, op2);
 }
@@ -66,7 +74,7 @@ svfloat32_t test_svscale_f32_z(svbool_t pg, svfloat32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svscale_f64_z(svbool_t pg, svfloat64_t op1, svint64_t op2)
+svfloat64_t test_svscale_f64_z(svbool_t pg, svfloat64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f64,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svfloat64_t test_svscale_f64_z(svbool_t pg, svfloat64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svscale_f16_m(svbool_t pg, svfloat16_t op1, svint16_t op2)
+svfloat16_t test_svscale_f16_m(svbool_t pg, svfloat16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f16,_m,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svfloat16_t test_svscale_f16_m(svbool_t pg, svfloat16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svscale_f32_m(svbool_t pg, svfloat32_t op1, svint32_t op2)
+svfloat32_t test_svscale_f32_m(svbool_t pg, svfloat32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f32,_m,)(pg, op1, op2);
 }
@@ -117,7 +125,7 @@ svfloat32_t test_svscale_f32_m(svbool_t pg, svfloat32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svscale_f64_m(svbool_t pg, svfloat64_t op1, svint64_t op2)
+svfloat64_t test_svscale_f64_m(svbool_t pg, svfloat64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f64,_m,)(pg, op1, op2);
 }
@@ -134,7 +142,7 @@ svfloat64_t test_svscale_f64_m(svbool_t pg, svfloat64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svscale_f16_x(svbool_t pg, svfloat16_t op1, svint16_t op2)
+svfloat16_t test_svscale_f16_x(svbool_t pg, svfloat16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f16,_x,)(pg, op1, op2);
 }
@@ -151,7 +159,7 @@ svfloat16_t test_svscale_f16_x(svbool_t pg, svfloat16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svscale_f32_x(svbool_t pg, svfloat32_t op1, svint32_t op2)
+svfloat32_t test_svscale_f32_x(svbool_t pg, svfloat32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f32,_x,)(pg, op1, op2);
 }
@@ -168,7 +176,7 @@ svfloat32_t test_svscale_f32_x(svbool_t pg, svfloat32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svscale_f64_x(svbool_t pg, svfloat64_t op1, svint64_t op2)
+svfloat64_t test_svscale_f64_x(svbool_t pg, svfloat64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_f64,_x,)(pg, op1, op2);
 }
@@ -191,7 +199,7 @@ svfloat64_t test_svscale_f64_x(svbool_t pg, svfloat64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svscale_n_f16_z(svbool_t pg, svfloat16_t op1, int16_t op2)
+svfloat16_t test_svscale_n_f16_z(svbool_t pg, svfloat16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f16,_z,)(pg, op1, op2);
 }
@@ -214,7 +222,7 @@ svfloat16_t test_svscale_n_f16_z(svbool_t pg, svfloat16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svscale_n_f32_z(svbool_t pg, svfloat32_t op1, int32_t op2)
+svfloat32_t test_svscale_n_f32_z(svbool_t pg, svfloat32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f32,_z,)(pg, op1, op2);
 }
@@ -237,7 +245,7 @@ svfloat32_t test_svscale_n_f32_z(svbool_t pg, svfloat32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svscale_n_f64_z(svbool_t pg, svfloat64_t op1, int64_t op2)
+svfloat64_t test_svscale_n_f64_z(svbool_t pg, svfloat64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f64,_z,)(pg, op1, op2);
 }
@@ -258,7 +266,7 @@ svfloat64_t test_svscale_n_f64_z(svbool_t pg, svfloat64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svscale_n_f16_m(svbool_t pg, svfloat16_t op1, int16_t op2)
+svfloat16_t test_svscale_n_f16_m(svbool_t pg, svfloat16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f16,_m,)(pg, op1, op2);
 }
@@ -279,7 +287,7 @@ svfloat16_t test_svscale_n_f16_m(svbool_t pg, svfloat16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svscale_n_f32_m(svbool_t pg, svfloat32_t op1, int32_t op2)
+svfloat32_t test_svscale_n_f32_m(svbool_t pg, svfloat32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f32,_m,)(pg, op1, op2);
 }
@@ -300,7 +308,7 @@ svfloat32_t test_svscale_n_f32_m(svbool_t pg, svfloat32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svscale_n_f64_m(svbool_t pg, svfloat64_t op1, int64_t op2)
+svfloat64_t test_svscale_n_f64_m(svbool_t pg, svfloat64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f64,_m,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svfloat64_t test_svscale_n_f64_m(svbool_t pg, svfloat64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svscale_n_f16_x(svbool_t pg, svfloat16_t op1, int16_t op2)
+svfloat16_t test_svscale_n_f16_x(svbool_t pg, svfloat16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f16,_x,)(pg, op1, op2);
 }
@@ -342,7 +350,7 @@ svfloat16_t test_svscale_n_f16_x(svbool_t pg, svfloat16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svscale_n_f32_x(svbool_t pg, svfloat32_t op1, int32_t op2)
+svfloat32_t test_svscale_n_f32_x(svbool_t pg, svfloat32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f32,_x,)(pg, op1, op2);
 }
@@ -363,7 +371,7 @@ svfloat32_t test_svscale_n_f32_x(svbool_t pg, svfloat32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svscale_n_f64_x(svbool_t pg, svfloat64_t op1, int64_t op2)
+svfloat64_t test_svscale_n_f64_x(svbool_t pg, svfloat64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svscale,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel-bfloat.c
index 0665b44118301..82be1904f6770 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel-bfloat.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsel_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svsel_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   // expected-warning at +1 {{implicit declaration of function 'svsel_bf16'}}
   return SVE_ACLE_FUNC(svsel,_bf16,,)(pg, op1, op2);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c
index af7f8da113996..9cf7f4d7f45cc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sel.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = select <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsel_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsel_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_s8,,)(pg, op1, op2);
 }
@@ -41,7 +49,7 @@ svint8_t test_svsel_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsel_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsel_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_s16,,)(pg, op1, op2);
 }
@@ -58,7 +66,7 @@ svint16_t test_svsel_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsel_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsel_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_s32,,)(pg, op1, op2);
 }
@@ -75,7 +83,7 @@ svint32_t test_svsel_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsel_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsel_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_s64,,)(pg, op1, op2);
 }
@@ -90,7 +98,7 @@ svint64_t test_svsel_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = select <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsel_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsel_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_u8,,)(pg, op1, op2);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svsel_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsel_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsel_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_u16,,)(pg, op1, op2);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svsel_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsel_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsel_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_u32,,)(pg, op1, op2);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svsel_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsel_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsel_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_u64,,)(pg, op1, op2);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svsel_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsel_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsel_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_f16,,)(pg, op1, op2);
 }
@@ -175,7 +183,7 @@ svfloat16_t test_svsel_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsel_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsel_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_f32,,)(pg, op1, op2);
 }
@@ -192,7 +200,7 @@ svfloat32_t test_svsel_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsel_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsel_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_f64,,)(pg, op1, op2);
 }
@@ -207,7 +215,7 @@ svfloat64_t test_svsel_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = select <vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]]
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svsel_b(svbool_t pg, svbool_t op1, svbool_t op2)
+svbool_t test_svsel_b(svbool_t pg, svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsel,_b,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
index d10dc84e9865e..7298666b3b1db 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c
index 4a4b8969a1110..71527c321f1e2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set2.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
index 0ad7f09cc407b..72cd7e9dc001a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c
index 0d0f611400d6d..17a808f72edac 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set3.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
index d4abce07b401d..49fcb15b9b3cd 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4-bfloat.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +bf16 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c
index 7e1aa74003d07..7eec14b84a9f9 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_set4.c
@@ -1,11 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice-bfloat.c
index e3959ac34f000..266e32fb59dd3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice-bfloat.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.splice.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsplice_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svsplice_bf16(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   // expected-warning at +1 {{implicit declaration of function 'svsplice_bf16'}}
   return SVE_ACLE_FUNC(svsplice,_bf16,,)(pg, op1, op2);
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c
index 6070da575850c..fe4e910e37aae 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_splice.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsplice_s8(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsplice_s8(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_s8,,)(pg, op1, op2);
 }
@@ -41,7 +49,7 @@ svint8_t test_svsplice_s8(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsplice_s16(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsplice_s16(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_s16,,)(pg, op1, op2);
 }
@@ -58,7 +66,7 @@ svint16_t test_svsplice_s16(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsplice_s32(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsplice_s32(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_s32,,)(pg, op1, op2);
 }
@@ -75,7 +83,7 @@ svint32_t test_svsplice_s32(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsplice_s64(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsplice_s64(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_s64,,)(pg, op1, op2);
 }
@@ -90,7 +98,7 @@ svint64_t test_svsplice_s64(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.splice.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsplice_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsplice_u8(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_u8,,)(pg, op1, op2);
 }
@@ -107,7 +115,7 @@ svuint8_t test_svsplice_u8(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.splice.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsplice_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsplice_u16(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_u16,,)(pg, op1, op2);
 }
@@ -124,7 +132,7 @@ svuint16_t test_svsplice_u16(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.splice.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsplice_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsplice_u32(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_u32,,)(pg, op1, op2);
 }
@@ -141,7 +149,7 @@ svuint32_t test_svsplice_u32(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.splice.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsplice_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsplice_u64(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_u64,,)(pg, op1, op2);
 }
@@ -158,7 +166,7 @@ svuint64_t test_svsplice_u64(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.splice.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsplice_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsplice_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_f16,,)(pg, op1, op2);
 }
@@ -175,7 +183,7 @@ svfloat16_t test_svsplice_f16(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.splice.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsplice_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsplice_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_f32,,)(pg, op1, op2);
 }
@@ -192,7 +200,7 @@ svfloat32_t test_svsplice_f32(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.splice.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsplice_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsplice_f64(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsplice,_f64,,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c
index 2c51c473bdc2a..8c3ac5dbfb182 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sqrt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsqrt_f16_z(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svsqrt_f16_z(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f16,_z,)(pg, op);
 }
@@ -43,7 +51,7 @@ svfloat16_t test_svsqrt_f16_z(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsqrt_f32_z(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svsqrt_f32_z(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f32,_z,)(pg, op);
 }
@@ -60,7 +68,7 @@ svfloat32_t test_svsqrt_f32_z(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsqrt_f64_z(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svsqrt_f64_z(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f64,_z,)(pg, op);
 }
@@ -77,7 +85,7 @@ svfloat64_t test_svsqrt_f64_z(svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half> [[INACTIVE:%.*]], <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsqrt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
+svfloat16_t test_svsqrt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f16,_m,)(inactive, pg, op);
 }
@@ -94,7 +102,7 @@ svfloat16_t test_svsqrt_f16_m(svfloat16_t inactive, svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> [[INACTIVE:%.*]], <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsqrt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
+svfloat32_t test_svsqrt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f32,_m,)(inactive, pg, op);
 }
@@ -111,7 +119,7 @@ svfloat32_t test_svsqrt_f32_m(svfloat32_t inactive, svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double> [[INACTIVE:%.*]], <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsqrt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
+svfloat64_t test_svsqrt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f64,_m,)(inactive, pg, op);
 }
@@ -128,7 +136,7 @@ svfloat64_t test_svsqrt_f64_m(svfloat64_t inactive, svbool_t pg, svfloat64_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsqrt.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsqrt_f16_x(svbool_t pg, svfloat16_t op)
+svfloat16_t test_svsqrt_f16_x(svbool_t pg, svfloat16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f16,_x,)(pg, op);
 }
@@ -145,7 +153,7 @@ svfloat16_t test_svsqrt_f16_x(svbool_t pg, svfloat16_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsqrt_f32_x(svbool_t pg, svfloat32_t op)
+svfloat32_t test_svsqrt_f32_x(svbool_t pg, svfloat32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f32,_x,)(pg, op);
 }
@@ -162,7 +170,7 @@ svfloat32_t test_svsqrt_f32_x(svbool_t pg, svfloat32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsqrt.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsqrt_f64_x(svbool_t pg, svfloat64_t op)
+svfloat64_t test_svsqrt_f64_x(svbool_t pg, svfloat64_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsqrt,_f64,_x,)(pg, op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
index 7075e96d62c5b..c1254e03102d7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
+void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_bf16,,)(pg, base, data);
 }
@@ -52,7 +60,7 @@ void test_svst1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8bf16.p0(<vscale x 8 x bfloat> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data)
+void test_svst1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_bf16,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
index 987ec6c3e8789..519f0c90614a5 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
+void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_s8,,)(pg, base, data);
 }
@@ -41,7 +49,7 @@ void test_svst1_s8(svbool_t pg, int8_t *base, svint8_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
+void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_s16,,)(pg, base, data);
 }
@@ -58,7 +66,7 @@ void test_svst1_s16(svbool_t pg, int16_t *base, svint16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
+void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_s32,,)(pg, base, data);
 }
@@ -75,7 +83,7 @@ void test_svst1_s32(svbool_t pg, int32_t *base, svint32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
+void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_s64,,)(pg, base, data);
 }
@@ -90,7 +98,7 @@ void test_svst1_s64(svbool_t pg, int64_t *base, svint64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
+void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_u8,,)(pg, base, data);
 }
@@ -107,7 +115,7 @@ void test_svst1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
+void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_u16,,)(pg, base, data);
 }
@@ -124,7 +132,7 @@ void test_svst1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
+void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_u32,,)(pg, base, data);
 }
@@ -141,7 +149,7 @@ void test_svst1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
+void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_u64,,)(pg, base, data);
 }
@@ -158,7 +166,7 @@ void test_svst1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
+void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_f16,,)(pg, base, data);
 }
@@ -175,7 +183,7 @@ void test_svst1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
+void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_f32,,)(pg, base, data);
 }
@@ -192,7 +200,7 @@ void test_svst1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
+void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1,_f64,,)(pg, base, data);
 }
@@ -215,7 +223,7 @@ void test_svst1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
+void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_s8,,)(pg, base, vnum, data);
 }
@@ -240,7 +248,7 @@ void test_svst1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
+void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -265,7 +273,7 @@ void test_svst1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t dat
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
+void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -290,7 +298,7 @@ void test_svst1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t dat
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
+void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -313,7 +321,7 @@ void test_svst1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t dat
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[DATA:%.*]], ptr [[TMP2]], i32 1, <vscale x 16 x i1> [[PG:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
+void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_u8,,)(pg, base, vnum, data);
 }
@@ -338,7 +346,7 @@ void test_svst1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
+void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -363,7 +371,7 @@ void test_svst1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
+void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -388,7 +396,7 @@ void test_svst1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
+void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_u64,,)(pg, base, vnum, data);
 }
@@ -413,7 +421,7 @@ void test_svst1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv8f16.p0(<vscale x 8 x half> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
+void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_f16,,)(pg, base, vnum, data);
 }
@@ -438,7 +446,7 @@ void test_svst1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
+void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_f32,,)(pg, base, vnum, data);
 }
@@ -463,11 +471,13 @@ void test_svst1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t
 // CPP-CHECK-NEXT:    tail call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[DATA:%.*]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
+void test_svst1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1_vnum,_f64,,)(pg, base, vnum, data);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svst1_scatter_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
@@ -1193,3 +1203,5 @@ void test_svst1_scatter_u64base_index_f64(svbool_t pg, svuint64_t bases, int64_t
 {
   return SVE_ACLE_FUNC(svst1_scatter,_u64base,_index,_f64)(pg, bases, index, data);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
index 10187fba3aa6e..152f01aab7405 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1b.c
@@ -2,8 +2,17 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -18,7 +27,7 @@
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
+void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_s16,,)(pg, base, data);
 }
@@ -30,7 +39,7 @@ void test_svst1b_s16(svbool_t pg, int8_t *base, svint16_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
+void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_s32,,)(pg, base, data);
 }
@@ -42,7 +51,7 @@ void test_svst1b_s32(svbool_t pg, int8_t *base, svint32_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
+void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_s64,,)(pg, base, data);
 }
@@ -54,7 +63,7 @@ void test_svst1b_s64(svbool_t pg, int8_t *base, svint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
+void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_u16,,)(pg, base, data);
 }
@@ -66,7 +75,7 @@ void test_svst1b_u16(svbool_t pg, uint8_t *base, svuint16_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
+void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_u32,,)(pg, base, data);
 }
@@ -78,7 +87,7 @@ void test_svst1b_u32(svbool_t pg, uint8_t *base, svuint32_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
+void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b,_u64,,)(pg, base, data);
 }
@@ -94,7 +103,7 @@ void test_svst1b_u64(svbool_t pg, uint8_t *base, svuint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data)
+void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -110,7 +119,7 @@ void test_svst1b_vnum_s16(svbool_t pg, int8_t *base, int64_t vnum, svint16_t dat
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data)
+void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -126,7 +135,7 @@ void test_svst1b_vnum_s32(svbool_t pg, int8_t *base, int64_t vnum, svint32_t dat
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data)
+void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -142,7 +151,7 @@ void test_svst1b_vnum_s64(svbool_t pg, int8_t *base, int64_t vnum, svint64_t dat
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv8i8.p0(<vscale x 8 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 8 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data)
+void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -158,7 +167,7 @@ void test_svst1b_vnum_u16(svbool_t pg, uint8_t *base, int64_t vnum, svuint16_t d
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data)
+void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -174,11 +183,13 @@ void test_svst1b_vnum_u32(svbool_t pg, uint8_t *base, int64_t vnum, svuint32_t d
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data)
+void test_svst1b_vnum_u64(svbool_t pg, uint8_t *base, int64_t vnum, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1b_vnum,_u64,,)(pg, base, vnum, data);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svst1b_scatter_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i8>
@@ -370,3 +381,5 @@ void test_svst1b_scatter_u64base_offset_u64(svbool_t pg, svuint64_t bases, int64
 {
   return SVE_ACLE_FUNC(svst1b_scatter,_u64base,_offset,_u64)(pg, bases, offset, data);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
index 598b42a5e9c26..9aa450f2e5457 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1h.c
@@ -2,8 +2,17 @@
 // REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -18,7 +27,7 @@
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
+void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h,_s32,,)(pg, base, data);
 }
@@ -30,7 +39,7 @@ void test_svst1h_s32(svbool_t pg, int16_t *base, svint32_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
+void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h,_s64,,)(pg, base, data);
 }
@@ -42,7 +51,7 @@ void test_svst1h_s64(svbool_t pg, int16_t *base, svint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
+void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h,_u32,,)(pg, base, data);
 }
@@ -54,7 +63,7 @@ void test_svst1h_u32(svbool_t pg, uint16_t *base, svuint32_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
+void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h,_u64,,)(pg, base, data);
 }
@@ -70,7 +79,7 @@ void test_svst1h_u64(svbool_t pg, uint16_t *base, svuint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data)
+void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -86,7 +95,7 @@ void test_svst1h_vnum_s32(svbool_t pg, int16_t *base, int64_t vnum, svint32_t da
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data)
+void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -102,7 +111,7 @@ void test_svst1h_vnum_s64(svbool_t pg, int16_t *base, int64_t vnum, svint64_t da
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv4i16.p0(<vscale x 4 x i16> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 4 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data)
+void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -118,11 +127,13 @@ void test_svst1h_vnum_u32(svbool_t pg, uint16_t *base, int64_t vnum, svuint32_t
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i16.p0(<vscale x 2 x i16> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data)
+void test_svst1h_vnum_u64(svbool_t pg, uint16_t *base, int64_t vnum, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1h_vnum,_u64,,)(pg, base, vnum, data);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svst1h_scatter_u32base_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 4 x i32> [[DATA:%.*]] to <vscale x 4 x i16>
@@ -462,3 +473,5 @@ void test_svst1h_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_
 {
   return SVE_ACLE_FUNC(svst1h_scatter,_u64base,_index,_u64)(pg, bases, index, data);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
index e224d944f7b9b..f22190b3583ed 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st1w.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -o - -emit-llvm %s | opt -S -passes=mem2reg,instcombine,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -19,7 +27,7 @@
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
+void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1w,_s64,,)(pg, base, data);
 }
@@ -31,7 +39,7 @@ void test_svst1w_s64(svbool_t pg, int32_t *base, svint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP1]], ptr [[BASE:%.*]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
+void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1w,_u64,,)(pg, base, data);
 }
@@ -47,7 +55,7 @@ void test_svst1w_u64(svbool_t pg, uint32_t *base, svuint64_t data)
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data)
+void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1w_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -63,11 +71,13 @@ void test_svst1w_vnum_s64(svbool_t pg, int32_t *base, int64_t vnum, svint64_t da
 // CHECK-NEXT:    tail call void @llvm.masked.store.nxv2i32.p0(<vscale x 2 x i32> [[TMP4]], ptr [[TMP3]], i32 1, <vscale x 2 x i1> [[TMP0]])
 // CHECK-NEXT:    ret void
 //
-void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data)
+void test_svst1w_vnum_u64(svbool_t pg, uint32_t *base, int64_t vnum, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst1w_vnum,_u64,,)(pg, base, vnum, data);
 }
 
+#ifndef __ARM_FEATURE_SME 
+
 // CHECK-LABEL: @test_svst1w_scatter_u64base_s64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = trunc <vscale x 2 x i64> [[DATA:%.*]] to <vscale x 2 x i32>
@@ -237,3 +247,5 @@ void test_svst1w_scatter_u64base_index_u64(svbool_t pg, svuint64_t bases, int64_
 {
   return SVE_ACLE_FUNC(svst1w_scatter,_u64base,_index,_u64)(pg, bases, index, data);
 }
+
+#endif
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c
index dae405bc2f29a..e2c4883f7a1c8 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -30,7 +38,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
+void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_bf16,,)(pg, base, data);
 }
@@ -53,7 +61,7 @@ void test_svst2_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data)
+void test_svst2_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_bf16,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c
index b6cb6dbdc0c48..9d87943dc3591 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st2.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -28,7 +36,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data)
+void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_s8,,)(pg, base, data);
 }
@@ -49,7 +57,7 @@ void test_svst2_s8(svbool_t pg, int8_t *base, svint8x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data)
+void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_s16,,)(pg, base, data);
 }
@@ -70,7 +78,7 @@ void test_svst2_s16(svbool_t pg, int16_t *base, svint16x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data)
+void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_s32,,)(pg, base, data);
 }
@@ -91,7 +99,7 @@ void test_svst2_s32(svbool_t pg, int32_t *base, svint32x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data)
+void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_s64,,)(pg, base, data);
 }
@@ -110,7 +118,7 @@ void test_svst2_s64(svbool_t pg, int64_t *base, svint64x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data)
+void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_u8,,)(pg, base, data);
 }
@@ -131,7 +139,7 @@ void test_svst2_u8(svbool_t pg, uint8_t *base, svuint8x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data)
+void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_u16,,)(pg, base, data);
 }
@@ -152,7 +160,7 @@ void test_svst2_u16(svbool_t pg, uint16_t *base, svuint16x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data)
+void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_u32,,)(pg, base, data);
 }
@@ -173,7 +181,7 @@ void test_svst2_u32(svbool_t pg, uint32_t *base, svuint32x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data)
+void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_u64,,)(pg, base, data);
 }
@@ -194,7 +202,7 @@ void test_svst2_u64(svbool_t pg, uint64_t *base, svuint64x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data)
+void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_f16,,)(pg, base, data);
 }
@@ -215,7 +223,7 @@ void test_svst2_f16(svbool_t pg, float16_t *base, svfloat16x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data)
+void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_f32,,)(pg, base, data);
 }
@@ -236,7 +244,7 @@ void test_svst2_f32(svbool_t pg, float32_t *base, svfloat32x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data)
+void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2,_f64,,)(pg, base, data);
 }
@@ -257,7 +265,7 @@ void test_svst2_f64(svbool_t pg, float64_t *base, svfloat64x2_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data)
+void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_s8,,)(pg, base, vnum, data);
 }
@@ -280,7 +288,7 @@ void test_svst2_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x2_t data
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data)
+void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -303,7 +311,7 @@ void test_svst2_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x2_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data)
+void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -326,7 +334,7 @@ void test_svst2_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x2_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data)
+void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -347,7 +355,7 @@ void test_svst2_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x2_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP2]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data)
+void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_u8,,)(pg, base, vnum, data);
 }
@@ -370,7 +378,7 @@ void test_svst2_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x2_t da
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data)
+void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -393,7 +401,7 @@ void test_svst2_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x2_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data)
+void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -416,7 +424,7 @@ void test_svst2_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x2_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data)
+void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_u64,,)(pg, base, vnum, data);
 }
@@ -439,7 +447,7 @@ void test_svst2_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x2_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data)
+void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_f16,,)(pg, base, vnum, data);
 }
@@ -462,7 +470,7 @@ void test_svst2_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x2
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data)
+void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_f32,,)(pg, base, vnum, data);
 }
@@ -485,7 +493,7 @@ void test_svst2_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x2
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x i1> [[TMP2]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data)
+void test_svst2_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x2_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst2_vnum,_f64,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c
index fb56b830e6152..9b7db79896e48 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -33,7 +41,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data)
+void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_bf16,,)(pg, base, data);
 }
@@ -58,7 +66,7 @@ void test_svst3_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data)
+void test_svst3_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_bf16,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c
index fef8c86822e4d..23da13f0e057e 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st3.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -30,7 +38,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data)
+void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_s8,,)(pg, base, data);
 }
@@ -53,7 +61,7 @@ void test_svst3_s8(svbool_t pg, int8_t *base, svint8x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data)
+void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_s16,,)(pg, base, data);
 }
@@ -76,7 +84,7 @@ void test_svst3_s16(svbool_t pg, int16_t *base, svint16x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data)
+void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_s32,,)(pg, base, data);
 }
@@ -99,7 +107,7 @@ void test_svst3_s32(svbool_t pg, int32_t *base, svint32x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data)
+void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_s64,,)(pg, base, data);
 }
@@ -120,7 +128,7 @@ void test_svst3_s64(svbool_t pg, int64_t *base, svint64x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data)
+void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_u8,,)(pg, base, data);
 }
@@ -143,7 +151,7 @@ void test_svst3_u8(svbool_t pg, uint8_t *base, svuint8x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data)
+void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_u16,,)(pg, base, data);
 }
@@ -166,7 +174,7 @@ void test_svst3_u16(svbool_t pg, uint16_t *base, svuint16x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data)
+void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_u32,,)(pg, base, data);
 }
@@ -189,7 +197,7 @@ void test_svst3_u32(svbool_t pg, uint32_t *base, svuint32x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data)
+void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_u64,,)(pg, base, data);
 }
@@ -212,7 +220,7 @@ void test_svst3_u64(svbool_t pg, uint64_t *base, svuint64x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data)
+void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_f16,,)(pg, base, data);
 }
@@ -235,7 +243,7 @@ void test_svst3_f16(svbool_t pg, float16_t *base, svfloat16x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data)
+void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_f32,,)(pg, base, data);
 }
@@ -258,7 +266,7 @@ void test_svst3_f32(svbool_t pg, float32_t *base, svfloat32x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data)
+void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3,_f64,,)(pg, base, data);
 }
@@ -281,7 +289,7 @@ void test_svst3_f64(svbool_t pg, float64_t *base, svfloat64x3_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data)
+void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_s8,,)(pg, base, vnum, data);
 }
@@ -306,7 +314,7 @@ void test_svst3_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x3_t data
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data)
+void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -331,7 +339,7 @@ void test_svst3_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x3_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data)
+void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -356,7 +364,7 @@ void test_svst3_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x3_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data)
+void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -379,7 +387,7 @@ void test_svst3_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x3_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP3]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data)
+void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_u8,,)(pg, base, vnum, data);
 }
@@ -404,7 +412,7 @@ void test_svst3_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x3_t da
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data)
+void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -429,7 +437,7 @@ void test_svst3_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x3_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data)
+void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -454,7 +462,7 @@ void test_svst3_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x3_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data)
+void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_u64,,)(pg, base, vnum, data);
 }
@@ -479,7 +487,7 @@ void test_svst3_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x3_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data)
+void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_f16,,)(pg, base, vnum, data);
 }
@@ -504,7 +512,7 @@ void test_svst3_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x3
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data)
+void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_f32,,)(pg, base, vnum, data);
 }
@@ -529,7 +537,7 @@ void test_svst3_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x3
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x i1> [[TMP3]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data)
+void test_svst3_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x3_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst3_vnum,_f64,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c
index d112624d61b34..dd032ac0a2bf2 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -35,7 +43,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data)
+void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_bf16,,)(pg, base, data);
 }
@@ -62,7 +70,7 @@ void test_svst4_bf16(svbool_t pg, bfloat16_t *base, svbfloat16x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8bf16(<vscale x 8 x bfloat> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[TMP2]], <vscale x 8 x bfloat> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data)
+void test_svst4_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_bf16,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c
index 2d25599fe1758..2ed52dff68729 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_st4.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -32,7 +40,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
+void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_s8,,)(pg, base, data);
 }
@@ -57,7 +65,7 @@ void test_svst4_s8(svbool_t pg, int8_t *base, svint8x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
+void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_s16,,)(pg, base, data);
 }
@@ -82,7 +90,7 @@ void test_svst4_s16(svbool_t pg, int16_t *base, svint16x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
+void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_s32,,)(pg, base, data);
 }
@@ -107,7 +115,7 @@ void test_svst4_s32(svbool_t pg, int32_t *base, svint32x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
+void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_s64,,)(pg, base, data);
 }
@@ -130,7 +138,7 @@ void test_svst4_s64(svbool_t pg, int64_t *base, svint64x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
+void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_u8,,)(pg, base, data);
 }
@@ -155,7 +163,7 @@ void test_svst4_u8(svbool_t pg, uint8_t *base, svuint8x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
+void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_u16,,)(pg, base, data);
 }
@@ -180,7 +188,7 @@ void test_svst4_u16(svbool_t pg, uint16_t *base, svuint16x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
+void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_u32,,)(pg, base, data);
 }
@@ -205,7 +213,7 @@ void test_svst4_u32(svbool_t pg, uint32_t *base, svuint32x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
+void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_u64,,)(pg, base, data);
 }
@@ -230,7 +238,7 @@ void test_svst4_u64(svbool_t pg, uint64_t *base, svuint64x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
+void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_f16,,)(pg, base, data);
 }
@@ -255,7 +263,7 @@ void test_svst4_f16(svbool_t pg, float16_t *base, svfloat16x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
+void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_f32,,)(pg, base, data);
 }
@@ -280,7 +288,7 @@ void test_svst4_f32(svbool_t pg, float32_t *base, svfloat32x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
+void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4,_f64,,)(pg, base, data);
 }
@@ -305,7 +313,7 @@ void test_svst4_f64(svbool_t pg, float64_t *base, svfloat64x4_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data)
+void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_s8,,)(pg, base, vnum, data);
 }
@@ -332,7 +340,7 @@ void test_svst4_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8x4_t data
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data)
+void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -359,7 +367,7 @@ void test_svst4_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16x4_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data)
+void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -386,7 +394,7 @@ void test_svst4_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32x4_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data)
+void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -411,7 +419,7 @@ void test_svst4_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64x4_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP4]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data)
+void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_u8,,)(pg, base, vnum, data);
 }
@@ -438,7 +446,7 @@ void test_svst4_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8x4_t da
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[TMP2]], <vscale x 8 x i16> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data)
+void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -465,7 +473,7 @@ void test_svst4_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16x4_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data)
+void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -492,7 +500,7 @@ void test_svst4_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32x4_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[TMP2]], <vscale x 2 x i64> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data)
+void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_u64,,)(pg, base, vnum, data);
 }
@@ -519,7 +527,7 @@ void test_svst4_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64x4_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[TMP2]], <vscale x 8 x half> [[TMP3]], <vscale x 8 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data)
+void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_f16,,)(pg, base, vnum, data);
 }
@@ -546,7 +554,7 @@ void test_svst4_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16x4
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]], <vscale x 4 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data)
+void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_f32,,)(pg, base, vnum, data);
 }
@@ -573,7 +581,7 @@ void test_svst4_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32x4
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x i1> [[TMP4]], ptr [[TMP5]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data)
+void test_svst4_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64x4_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svst4_vnum,_f64,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c
index 03c0b6cb1f613..c13f7d82caac0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -27,7 +35,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
+void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_bf16,,)(pg, base, data);
 }
@@ -46,7 +54,7 @@ void test_svstnt1_bf16(svbool_t pg, bfloat16_t *base, svbfloat16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data)
+void test_svstnt1_vnum_bf16(svbool_t pg, bfloat16_t *base, int64_t vnum, svbfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_bf16,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
index 9749737b0e3f2..5e0869557c8d7 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_stnt1.c
@@ -5,9 +5,17 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data)
+void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_s8,,)(pg, base, data);
 }
@@ -42,7 +50,7 @@ void test_svstnt1_s8(svbool_t pg, int8_t *base, svint8_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data)
+void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_s16,,)(pg, base, data);
 }
@@ -59,7 +67,7 @@ void test_svstnt1_s16(svbool_t pg, int16_t *base, svint16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data)
+void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_s32,,)(pg, base, data);
 }
@@ -76,7 +84,7 @@ void test_svstnt1_s32(svbool_t pg, int32_t *base, svint32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data)
+void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_s64,,)(pg, base, data);
 }
@@ -91,7 +99,7 @@ void test_svstnt1_s64(svbool_t pg, int64_t *base, svint64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
+void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_u8,,)(pg, base, data);
 }
@@ -108,7 +116,7 @@ void test_svstnt1_u8(svbool_t pg, uint8_t *base, svuint8_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
+void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_u16,,)(pg, base, data);
 }
@@ -125,7 +133,7 @@ void test_svstnt1_u16(svbool_t pg, uint16_t *base, svuint16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
+void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_u32,,)(pg, base, data);
 }
@@ -142,7 +150,7 @@ void test_svstnt1_u32(svbool_t pg, uint32_t *base, svuint32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
+void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_u64,,)(pg, base, data);
 }
@@ -159,7 +167,7 @@ void test_svstnt1_u64(svbool_t pg, uint64_t *base, svuint64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
+void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_f16,,)(pg, base, data);
 }
@@ -176,7 +184,7 @@ void test_svstnt1_f16(svbool_t pg, float16_t *base, svfloat16_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
+void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_f32,,)(pg, base, data);
 }
@@ -193,7 +201,7 @@ void test_svstnt1_f32(svbool_t pg, float32_t *base, svfloat32_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[BASE:%.*]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
+void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1,_f64,,)(pg, base, data);
 }
@@ -210,7 +218,7 @@ void test_svstnt1_f64(svbool_t pg, float64_t *base, svfloat64_t data)
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data)
+void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_s8,,)(pg, base, vnum, data);
 }
@@ -229,7 +237,7 @@ void test_svstnt1_vnum_s8(svbool_t pg, int8_t *base, int64_t vnum, svint8_t data
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data)
+void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_s16,,)(pg, base, vnum, data);
 }
@@ -248,7 +256,7 @@ void test_svstnt1_vnum_s16(svbool_t pg, int16_t *base, int64_t vnum, svint16_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data)
+void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_s32,,)(pg, base, vnum, data);
 }
@@ -267,7 +275,7 @@ void test_svstnt1_vnum_s32(svbool_t pg, int32_t *base, int64_t vnum, svint32_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data)
+void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_s64,,)(pg, base, vnum, data);
 }
@@ -284,7 +292,7 @@ void test_svstnt1_vnum_s64(svbool_t pg, int64_t *base, int64_t vnum, svint64_t d
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i1> [[PG:%.*]], ptr [[TMP0]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data)
+void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_u8,,)(pg, base, vnum, data);
 }
@@ -303,7 +311,7 @@ void test_svstnt1_vnum_u8(svbool_t pg, uint8_t *base, int64_t vnum, svuint8_t da
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data)
+void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_u16,,)(pg, base, vnum, data);
 }
@@ -322,7 +330,7 @@ void test_svstnt1_vnum_u16(svbool_t pg, uint16_t *base, int64_t vnum, svuint16_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data)
+void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_u32,,)(pg, base, vnum, data);
 }
@@ -341,7 +349,7 @@ void test_svstnt1_vnum_u32(svbool_t pg, uint32_t *base, int64_t vnum, svuint32_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data)
+void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_u64,,)(pg, base, vnum, data);
 }
@@ -360,7 +368,7 @@ void test_svstnt1_vnum_u64(svbool_t pg, uint64_t *base, int64_t vnum, svuint64_t
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data)
+void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_f16,,)(pg, base, vnum, data);
 }
@@ -379,7 +387,7 @@ void test_svstnt1_vnum_f16(svbool_t pg, float16_t *base, int64_t vnum, svfloat16
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data)
+void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_f32,,)(pg, base, vnum, data);
 }
@@ -398,7 +406,7 @@ void test_svstnt1_vnum_f32(svbool_t pg, float32_t *base, int64_t vnum, svfloat32
 // CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i1> [[TMP0]], ptr [[TMP1]])
 // CPP-CHECK-NEXT:    ret void
 //
-void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data)
+void test_svstnt1_vnum_f64(svbool_t pg, float64_t *base, int64_t vnum, svfloat64_t data) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svstnt1_vnum,_f64,,)(pg, base, vnum, data);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c
index 7f5c8aae031d5..383ca5b519e9d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sub.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svsub_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svsub_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svsub_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svsub_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svsub_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svsub_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svsub_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svsub_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svsub_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svsub_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svsub_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svsub_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svsub_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svsub_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svsub_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svsub_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svsub_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svsub_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svsub_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svsub_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svsub_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svsub_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svsub_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svsub_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svsub_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svsub_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svsub_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svsub_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svsub_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svsub_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svsub_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsub_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svsub_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svsub_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsub_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svsub_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svsub_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsub_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svsub_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsub_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsub_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svsub_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsub_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsub_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svsub_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsub_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsub_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svsub_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsub_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsub_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svsub_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsub_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsub_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svsub_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsub_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsub_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svsub_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svsub_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsub_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svsub_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svsub_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsub_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svsub_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svsub_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsub_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svsub_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsub_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsub_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svsub_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsub_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsub_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svsub_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsub_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsub_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svsub_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsub_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsub_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svsub_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsub_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsub_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svsub_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsub_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsub_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsub,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c
index d34108c323bde..f7d576b041b7f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_subr.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -26,7 +34,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s8,_z,)(pg, op1, op2);
 }
@@ -45,7 +53,7 @@ svint8_t test_svsubr_s8_z(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s16,_z,)(pg, op1, op2);
 }
@@ -64,7 +72,7 @@ svint16_t test_svsubr_s16_z(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s32,_z,)(pg, op1, op2);
 }
@@ -83,7 +91,7 @@ svint32_t test_svsubr_s32_z(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s64,_z,)(pg, op1, op2);
 }
@@ -100,7 +108,7 @@ svint64_t test_svsubr_s64_z(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u8,_z,)(pg, op1, op2);
 }
@@ -119,7 +127,7 @@ svuint8_t test_svsubr_u8_z(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u16,_z,)(pg, op1, op2);
 }
@@ -138,7 +146,7 @@ svuint16_t test_svsubr_u16_z(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u32,_z,)(pg, op1, op2);
 }
@@ -157,7 +165,7 @@ svuint32_t test_svsubr_u32_z(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u64,_z,)(pg, op1, op2);
 }
@@ -172,7 +180,7 @@ svuint64_t test_svsubr_u64_z(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s8,_m,)(pg, op1, op2);
 }
@@ -189,7 +197,7 @@ svint8_t test_svsubr_s8_m(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s16,_m,)(pg, op1, op2);
 }
@@ -206,7 +214,7 @@ svint16_t test_svsubr_s16_m(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s32,_m,)(pg, op1, op2);
 }
@@ -223,7 +231,7 @@ svint32_t test_svsubr_s32_m(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s64,_m,)(pg, op1, op2);
 }
@@ -238,7 +246,7 @@ svint64_t test_svsubr_s64_m(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u8,_m,)(pg, op1, op2);
 }
@@ -255,7 +263,7 @@ svuint8_t test_svsubr_u8_m(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u16,_m,)(pg, op1, op2);
 }
@@ -272,7 +280,7 @@ svuint16_t test_svsubr_u16_m(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u32,_m,)(pg, op1, op2);
 }
@@ -289,7 +297,7 @@ svuint32_t test_svsubr_u32_m(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u64,_m,)(pg, op1, op2);
 }
@@ -304,7 +312,7 @@ svuint64_t test_svsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
+svint8_t test_svsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s8,_x,)(pg, op1, op2);
 }
@@ -321,7 +329,7 @@ svint8_t test_svsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
+svint16_t test_svsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s16,_x,)(pg, op1, op2);
 }
@@ -338,7 +346,7 @@ svint16_t test_svsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
+svint32_t test_svsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s32,_x,)(pg, op1, op2);
 }
@@ -355,7 +363,7 @@ svint32_t test_svsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
+svint64_t test_svsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_s64,_x,)(pg, op1, op2);
 }
@@ -370,7 +378,7 @@ svint64_t test_svsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
+svuint8_t test_svsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u8,_x,)(pg, op1, op2);
 }
@@ -387,7 +395,7 @@ svuint8_t test_svsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
+svuint16_t test_svsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u16,_x,)(pg, op1, op2);
 }
@@ -404,7 +412,7 @@ svuint16_t test_svsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
+svuint32_t test_svsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u32,_x,)(pg, op1, op2);
 }
@@ -421,7 +429,7 @@ svuint32_t test_svsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
+svuint64_t test_svsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_u64,_x,)(pg, op1, op2);
 }
@@ -442,7 +450,7 @@ svuint64_t test_svsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svint8_t test_svsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s8,_z,)(pg, op1, op2);
 }
@@ -465,7 +473,7 @@ svint8_t test_svsubr_n_s8_z(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svint16_t test_svsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s16,_z,)(pg, op1, op2);
 }
@@ -488,7 +496,7 @@ svint16_t test_svsubr_n_s16_z(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svint32_t test_svsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s32,_z,)(pg, op1, op2);
 }
@@ -511,7 +519,7 @@ svint32_t test_svsubr_n_s32_z(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svint64_t test_svsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s64,_z,)(pg, op1, op2);
 }
@@ -532,7 +540,7 @@ svint64_t test_svsubr_n_s64_z(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG]], <vscale x 16 x i8> [[TMP0]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
-svuint8_t test_svsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u8,_z,)(pg, op1, op2);
 }
@@ -555,7 +563,7 @@ svuint8_t test_svsubr_n_u8_z(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[TMP1]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP2]]
 //
-svuint16_t test_svsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u16,_z,)(pg, op1, op2);
 }
@@ -578,7 +586,7 @@ svuint16_t test_svsubr_n_u16_z(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 //
-svuint32_t test_svsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u32,_z,)(pg, op1, op2);
 }
@@ -601,7 +609,7 @@ svuint32_t test_svsubr_n_u32_z(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[TMP1]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
 //
-svuint64_t test_svsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u64,_z,)(pg, op1, op2);
 }
@@ -620,7 +628,7 @@ svuint64_t test_svsubr_n_u64_z(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s8,_m,)(pg, op1, op2);
 }
@@ -641,7 +649,7 @@ svint8_t test_svsubr_n_s8_m(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s16,_m,)(pg, op1, op2);
 }
@@ -662,7 +670,7 @@ svint16_t test_svsubr_n_s16_m(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s32,_m,)(pg, op1, op2);
 }
@@ -683,7 +691,7 @@ svint32_t test_svsubr_n_s32_m(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s64,_m,)(pg, op1, op2);
 }
@@ -702,7 +710,7 @@ svint64_t test_svsubr_n_s64_m(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.subr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u8,_m,)(pg, op1, op2);
 }
@@ -723,7 +731,7 @@ svuint8_t test_svsubr_n_u8_m(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.subr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u16,_m,)(pg, op1, op2);
 }
@@ -744,7 +752,7 @@ svuint16_t test_svsubr_n_u16_m(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u32,_m,)(pg, op1, op2);
 }
@@ -765,7 +773,7 @@ svuint32_t test_svsubr_n_u32_m(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.subr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u64,_m,)(pg, op1, op2);
 }
@@ -784,7 +792,7 @@ svuint64_t test_svsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
+svint8_t test_svsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s8,_x,)(pg, op1, op2);
 }
@@ -805,7 +813,7 @@ svint8_t test_svsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svint16_t test_svsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
+svint16_t test_svsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s16,_x,)(pg, op1, op2);
 }
@@ -826,7 +834,7 @@ svint16_t test_svsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svint32_t test_svsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
+svint32_t test_svsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s32,_x,)(pg, op1, op2);
 }
@@ -847,7 +855,7 @@ svint32_t test_svsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svint64_t test_svsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
+svint64_t test_svsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_s64,_x,)(pg, op1, op2);
 }
@@ -866,7 +874,7 @@ svint64_t test_svsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
+svuint8_t test_svsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u8,_x,)(pg, op1, op2);
 }
@@ -887,7 +895,7 @@ svuint8_t test_svsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
-svuint16_t test_svsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
+svuint16_t test_svsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u16,_x,)(pg, op1, op2);
 }
@@ -908,7 +916,7 @@ svuint16_t test_svsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
-svuint32_t test_svsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
+svuint32_t test_svsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u32,_x,)(pg, op1, op2);
 }
@@ -929,7 +937,7 @@ svuint32_t test_svsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
-svuint64_t test_svsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
+svuint64_t test_svsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_u64,_x,)(pg, op1, op2);
 }
@@ -948,7 +956,7 @@ svuint64_t test_svsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svsubr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsubr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f16,_z,)(pg, op1, op2);
 }
@@ -967,7 +975,7 @@ svfloat16_t test_svsubr_f16_z(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svsubr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsubr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f32,_z,)(pg, op1, op2);
 }
@@ -986,7 +994,7 @@ svfloat32_t test_svsubr_f32_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svsubr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsubr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f64,_z,)(pg, op1, op2);
 }
@@ -1003,7 +1011,7 @@ svfloat64_t test_svsubr_f64_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsubr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsubr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f16,_m,)(pg, op1, op2);
 }
@@ -1020,7 +1028,7 @@ svfloat16_t test_svsubr_f16_m(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsubr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsubr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f32,_m,)(pg, op1, op2);
 }
@@ -1037,7 +1045,7 @@ svfloat32_t test_svsubr_f32_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsubr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsubr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f64,_m,)(pg, op1, op2);
 }
@@ -1054,7 +1062,7 @@ svfloat64_t test_svsubr_f64_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP2:%.*]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsubr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svsubr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f16,_x,)(pg, op1, op2);
 }
@@ -1071,7 +1079,7 @@ svfloat16_t test_svsubr_f16_x(svbool_t pg, svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP2:%.*]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsubr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svsubr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f32,_x,)(pg, op1, op2);
 }
@@ -1088,7 +1096,7 @@ svfloat32_t test_svsubr_f32_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP2:%.*]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsubr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svsubr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_f64,_x,)(pg, op1, op2);
 }
@@ -1111,7 +1119,7 @@ svfloat64_t test_svsubr_f64_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
 //
-svfloat16_t test_svsubr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsubr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f16,_z,)(pg, op1, op2);
 }
@@ -1134,7 +1142,7 @@ svfloat16_t test_svsubr_n_f16_z(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
 //
-svfloat32_t test_svsubr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsubr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f32,_z,)(pg, op1, op2);
 }
@@ -1157,7 +1165,7 @@ svfloat32_t test_svsubr_n_f32_z(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
 //
-svfloat64_t test_svsubr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsubr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f64,_z,)(pg, op1, op2);
 }
@@ -1178,7 +1186,7 @@ svfloat64_t test_svsubr_n_f64_z(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsubr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsubr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f16,_m,)(pg, op1, op2);
 }
@@ -1199,7 +1207,7 @@ svfloat16_t test_svsubr_n_f16_m(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsubr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsubr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f32,_m,)(pg, op1, op2);
 }
@@ -1220,7 +1228,7 @@ svfloat32_t test_svsubr_n_f32_m(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsubr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsubr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f64,_m,)(pg, op1, op2);
 }
@@ -1241,7 +1249,7 @@ svfloat64_t test_svsubr_n_f64_m(svbool_t pg, svfloat64_t op1, float64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[DOTSPLAT]], <vscale x 8 x half> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
-svfloat16_t test_svsubr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
+svfloat16_t test_svsubr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f16,_x,)(pg, op1, op2);
 }
@@ -1262,7 +1270,7 @@ svfloat16_t test_svsubr_n_f16_x(svbool_t pg, svfloat16_t op1, float16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[DOTSPLAT]], <vscale x 4 x float> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
-svfloat32_t test_svsubr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
+svfloat32_t test_svsubr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f32,_x,)(pg, op1, op2);
 }
@@ -1283,7 +1291,7 @@ svfloat32_t test_svsubr_n_f32_x(svbool_t pg, svfloat32_t op1, float32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[DOTSPLAT]], <vscale x 2 x double> [[OP1:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
-svfloat64_t test_svsubr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2)
+svfloat64_t test_svsubr_n_f64_x(svbool_t pg, svfloat64_t op1, float64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svsubr,_n_f64,_x,)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
index 4066c71c9ec32..b21a72024fd9f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_sudot.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-
 // RUN: %clang_cc1 -target-feature +i8mm -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -target-feature +i8mm -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -target-feature +i8mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -target-feature +i8mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +i8mm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +i8mm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Z:%.*]], <vscale x 16 x i8> [[Y:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) {
+svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot, _s32, , )(x, y, z);
 }
 
@@ -43,7 +51,7 @@ svint32_t test_svsudot_s32(svint32_t x, svint8_t y, svuint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[Y:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) {
+svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot, _n_s32, , )(x, y, z);
 }
 
@@ -57,7 +65,7 @@ svint32_t test_svsudot_n_s32(svint32_t x, svint8_t y, uint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) {
+svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 0);
 }
 
@@ -71,7 +79,7 @@ svint32_t test_svsudot_lane_s32_0(svint32_t x, svint8_t y, svuint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) {
+svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 1);
 }
 
@@ -85,7 +93,7 @@ svint32_t test_svsudot_lane_s32_1(svint32_t x, svint8_t y, svuint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) {
+svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 2);
 }
 
@@ -99,6 +107,6 @@ svint32_t test_svsudot_lane_s32_2(svint32_t x, svint8_t y, svuint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sudot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svsudot_lane_s32_3(svint32_t x, svint8_t y, svuint8_t z) {
+svint32_t test_svsudot_lane_s32_3(svint32_t x, svint8_t y, svuint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svsudot_lane, _s32, , )(x, y, z, 3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c
index 4e1f7cb709eef..d4b6b6842fb9a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c
@@ -6,8 +6,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tbl.nxv8bf16(<vscale x 8 x bfloat> [[DATA:%.*]], <vscale x 8 x i16> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) {
+svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) MODE_ATTR {
   // expected-warning at +1 {{implicit declaration of function 'svtbl_bf16'}}
   return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c
index e8daca4c600d5..89fa47b5f7974 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svtbl_s8(svint8_t data, svuint8_t indices)
+svint8_t test_svtbl_s8(svint8_t data, svuint8_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_s8,,)(data, indices);
 }
@@ -39,7 +47,7 @@ svint8_t test_svtbl_s8(svint8_t data, svuint8_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svtbl_s16(svint16_t data, svuint16_t indices)
+svint16_t test_svtbl_s16(svint16_t data, svuint16_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_s16,,)(data, indices);
 }
@@ -54,7 +62,7 @@ svint16_t test_svtbl_s16(svint16_t data, svuint16_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svtbl_s32(svint32_t data, svuint32_t indices)
+svint32_t test_svtbl_s32(svint32_t data, svuint32_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_s32,,)(data, indices);
 }
@@ -69,7 +77,7 @@ svint32_t test_svtbl_s32(svint32_t data, svuint32_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svtbl_s64(svint64_t data, svuint64_t indices)
+svint64_t test_svtbl_s64(svint64_t data, svuint64_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_s64,,)(data, indices);
 }
@@ -84,7 +92,7 @@ svint64_t test_svtbl_s64(svint64_t data, svuint64_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tbl.nxv16i8(<vscale x 16 x i8> [[DATA:%.*]], <vscale x 16 x i8> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svtbl_u8(svuint8_t data, svuint8_t indices)
+svuint8_t test_svtbl_u8(svuint8_t data, svuint8_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_u8,,)(data, indices);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svtbl_u8(svuint8_t data, svuint8_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tbl.nxv8i16(<vscale x 8 x i16> [[DATA:%.*]], <vscale x 8 x i16> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svtbl_u16(svuint16_t data, svuint16_t indices)
+svuint16_t test_svtbl_u16(svuint16_t data, svuint16_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_u16,,)(data, indices);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svtbl_u16(svuint16_t data, svuint16_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tbl.nxv4i32(<vscale x 4 x i32> [[DATA:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svtbl_u32(svuint32_t data, svuint32_t indices)
+svuint32_t test_svtbl_u32(svuint32_t data, svuint32_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_u32,,)(data, indices);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svtbl_u32(svuint32_t data, svuint32_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tbl.nxv2i64(<vscale x 2 x i64> [[DATA:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svtbl_u64(svuint64_t data, svuint64_t indices)
+svuint64_t test_svtbl_u64(svuint64_t data, svuint64_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_u64,,)(data, indices);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svtbl_u64(svuint64_t data, svuint64_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.tbl.nxv8f16(<vscale x 8 x half> [[DATA:%.*]], <vscale x 8 x i16> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svtbl_f16(svfloat16_t data, svuint16_t indices)
+svfloat16_t test_svtbl_f16(svfloat16_t data, svuint16_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_f16,,)(data, indices);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svtbl_f16(svfloat16_t data, svuint16_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.tbl.nxv4f32(<vscale x 4 x float> [[DATA:%.*]], <vscale x 4 x i32> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svtbl_f32(svfloat32_t data, svuint32_t indices)
+svfloat32_t test_svtbl_f32(svfloat32_t data, svuint32_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_f32,,)(data, indices);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svtbl_f32(svfloat32_t data, svuint32_t indices)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.tbl.nxv2f64(<vscale x 2 x double> [[DATA:%.*]], <vscale x 2 x i64> [[INDICES:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svtbl_f64(svfloat64_t data, svuint64_t indices)
+svfloat64_t test_svtbl_f64(svfloat64_t data, svuint64_t indices) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtbl,_f64,,)(data, indices);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1-bfloat.c
index 6fce83b6c0467..a6c6dcc571e28 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn1.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svtrn1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
index 70f97236bab80..f6d8ff770c600 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn1.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svtrn1_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svtrn1_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svtrn1_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svtrn1_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svtrn1_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svtrn1_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svtrn1_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svtrn1_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svtrn1_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svtrn1_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svtrn1_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svtrn1_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.trn1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svtrn1_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svtrn1_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svtrn1_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.trn1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svtrn1_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svtrn1_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svtrn1_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.trn1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svtrn1_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svtrn1_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svtrn1_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.trn1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svtrn1_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svtrn1_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svtrn1_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.trn1.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svtrn1_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svtrn1_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svtrn1_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.trn1.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svtrn1_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svtrn1_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svtrn1_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.trn1.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svtrn1_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svtrn1_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn1,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svtrn1_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn1_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn1_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn1_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svtrn1_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn1_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svtrn1_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn1_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svtrn1_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn1_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn1_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn1_b64(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2-bfloat.c
index fb07f3c911b1f..87063ac69dfac 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.trn2.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svtrn2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
index 0bf72c9198206..9442142bc097f 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_trn2.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svtrn2_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svtrn2_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svtrn2_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svtrn2_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svtrn2_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svtrn2_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svtrn2_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svtrn2_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svtrn2_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svtrn2_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svtrn2_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svtrn2_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.trn2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svtrn2_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svtrn2_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svtrn2_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.trn2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svtrn2_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svtrn2_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svtrn2_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.trn2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svtrn2_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svtrn2_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svtrn2_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.trn2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svtrn2_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svtrn2_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svtrn2_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.trn2.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svtrn2_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svtrn2_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svtrn2_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.trn2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svtrn2_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svtrn2_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svtrn2_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.trn2.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svtrn2_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svtrn2_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svtrn2,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svtrn2_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn2_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn2_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn2_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svtrn2_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn2_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svtrn2_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn2_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svtrn2_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.trn2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svtrn2_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svtrn2_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svtrn2_b64(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
index 4a2f512c95ee9..b15028c4b2629 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef-bfloat.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 8 x bfloat> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> undef
 //
-svbfloat16_t test_svundef_bf16()
+svbfloat16_t test_svundef_bf16(void) MODE_ATTR
 {
   return svundef_bf16();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef.c
index d9eea5ab04133..7daeb7489e5c4 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef.c
@@ -3,8 +3,16 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 // CHECK-LABEL: @test_svundef_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 16 x i8> undef
@@ -13,7 +21,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> undef
 //
-svint8_t test_svundef_s8()
+svint8_t test_svundef_s8(void) MODE_ATTR
 {
   return svundef_s8();
 }
@@ -26,7 +34,7 @@ svint8_t test_svundef_s8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> undef
 //
-svint16_t test_svundef_s16()
+svint16_t test_svundef_s16(void) MODE_ATTR
 {
   return svundef_s16();
 }
@@ -39,7 +47,7 @@ svint16_t test_svundef_s16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> undef
 //
-svint32_t test_svundef_s32()
+svint32_t test_svundef_s32(void) MODE_ATTR
 {
   return svundef_s32();
 }
@@ -52,7 +60,7 @@ svint32_t test_svundef_s32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> undef
 //
-svint64_t test_svundef_s64()
+svint64_t test_svundef_s64(void) MODE_ATTR
 {
   return svundef_s64();
 }
@@ -65,7 +73,7 @@ svint64_t test_svundef_s64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> undef
 //
-svuint8_t test_svundef_u8()
+svuint8_t test_svundef_u8(void) MODE_ATTR
 {
   return svundef_u8();
 }
@@ -78,7 +86,7 @@ svuint8_t test_svundef_u8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> undef
 //
-svuint16_t test_svundef_u16()
+svuint16_t test_svundef_u16(void) MODE_ATTR
 {
   return svundef_u16();
 }
@@ -91,7 +99,7 @@ svuint16_t test_svundef_u16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> undef
 //
-svuint32_t test_svundef_u32()
+svuint32_t test_svundef_u32(void) MODE_ATTR
 {
   return svundef_u32();
 }
@@ -104,7 +112,7 @@ svuint32_t test_svundef_u32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> undef
 //
-svuint64_t test_svundef_u64()
+svuint64_t test_svundef_u64(void) MODE_ATTR
 {
   return svundef_u64();
 }
@@ -117,7 +125,7 @@ svuint64_t test_svundef_u64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> undef
 //
-svfloat16_t test_svundef_f16()
+svfloat16_t test_svundef_f16(void) MODE_ATTR
 {
   return svundef_f16();
 }
@@ -130,7 +138,7 @@ svfloat16_t test_svundef_f16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> undef
 //
-svfloat32_t test_svundef_f32()
+svfloat32_t test_svundef_f32(void) MODE_ATTR
 {
   return svundef_f32();
 }
@@ -143,7 +151,7 @@ svfloat32_t test_svundef_f32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> undef
 //
-svfloat64_t test_svundef_f64()
+svfloat64_t test_svundef_f64(void) MODE_ATTR
 {
   return svundef_f64();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
index 98ae82dc4909a..dcaded8967fd0 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2-bfloat.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef2_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 16 x bfloat> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x bfloat> undef
 //
-svbfloat16x2_t test_svundef2_bf16()
+svbfloat16x2_t test_svundef2_bf16(void) MODE_ATTR
 {
   return svundef2_bf16();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c
index 5f9471d42b5d0..677e338879c00 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef2.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef2_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 32 x i8> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> undef
 //
-svint8x2_t test_svundef2_s8()
+svint8x2_t test_svundef2_s8(void) MODE_ATTR
 {
   return svundef2_s8();
 }
@@ -27,7 +35,7 @@ svint8x2_t test_svundef2_s8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> undef
 //
-svint16x2_t test_svundef2_s16()
+svint16x2_t test_svundef2_s16(void) MODE_ATTR
 {
   return svundef2_s16();
 }
@@ -40,7 +48,7 @@ svint16x2_t test_svundef2_s16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> undef
 //
-svint32x2_t test_svundef2_s32()
+svint32x2_t test_svundef2_s32(void) MODE_ATTR
 {
   return svundef2_s32();
 }
@@ -53,7 +61,7 @@ svint32x2_t test_svundef2_s32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> undef
 //
-svint64x2_t test_svundef2_s64()
+svint64x2_t test_svundef2_s64(void) MODE_ATTR
 {
   return svundef2_s64();
 }
@@ -66,7 +74,7 @@ svint64x2_t test_svundef2_s64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i8> undef
 //
-svuint8x2_t test_svundef2_u8()
+svuint8x2_t test_svundef2_u8(void) MODE_ATTR
 {
   return svundef2_u8();
 }
@@ -79,7 +87,7 @@ svuint8x2_t test_svundef2_u8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i16> undef
 //
-svuint16x2_t test_svundef2_u16()
+svuint16x2_t test_svundef2_u16(void) MODE_ATTR
 {
   return svundef2_u16();
 }
@@ -92,7 +100,7 @@ svuint16x2_t test_svundef2_u16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i32> undef
 //
-svuint32x2_t test_svundef2_u32()
+svuint32x2_t test_svundef2_u32(void) MODE_ATTR
 {
   return svundef2_u32();
 }
@@ -105,7 +113,7 @@ svuint32x2_t test_svundef2_u32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i64> undef
 //
-svuint64x2_t test_svundef2_u64()
+svuint64x2_t test_svundef2_u64(void) MODE_ATTR
 {
   return svundef2_u64();
 }
@@ -118,7 +126,7 @@ svuint64x2_t test_svundef2_u64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x half> undef
 //
-svfloat16x2_t test_svundef2_f16()
+svfloat16x2_t test_svundef2_f16(void) MODE_ATTR
 {
   return svundef2_f16();
 }
@@ -131,7 +139,7 @@ svfloat16x2_t test_svundef2_f16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x float> undef
 //
-svfloat32x2_t test_svundef2_f32()
+svfloat32x2_t test_svundef2_f32(void) MODE_ATTR
 {
   return svundef2_f32();
 }
@@ -144,7 +152,7 @@ svfloat32x2_t test_svundef2_f32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 4 x double> undef
 //
-svfloat64x2_t test_svundef2_f64()
+svfloat64x2_t test_svundef2_f64(void) MODE_ATTR
 {
   return svundef2_f64();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
index cc02e9a3c76d0..223340095addd 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3-bfloat.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef3_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 24 x bfloat> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 24 x bfloat> undef
 //
-svbfloat16x3_t test_svundef3_bf16()
+svbfloat16x3_t test_svundef3_bf16(void) MODE_ATTR
 {
   return svundef3_bf16();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c
index e4b3a5e6860a9..7104f21b75914 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef3.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef3_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 48 x i8> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> undef
 //
-svint8x3_t test_svundef3_s8()
+svint8x3_t test_svundef3_s8(void) MODE_ATTR
 {
   return svundef3_s8();
 }
@@ -27,7 +35,7 @@ svint8x3_t test_svundef3_s8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> undef
 //
-svint16x3_t test_svundef3_s16()
+svint16x3_t test_svundef3_s16(void) MODE_ATTR
 {
   return svundef3_s16();
 }
@@ -40,7 +48,7 @@ svint16x3_t test_svundef3_s16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> undef
 //
-svint32x3_t test_svundef3_s32()
+svint32x3_t test_svundef3_s32(void) MODE_ATTR
 {
   return svundef3_s32();
 }
@@ -53,7 +61,7 @@ svint32x3_t test_svundef3_s32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> undef
 //
-svint64x3_t test_svundef3_s64()
+svint64x3_t test_svundef3_s64(void) MODE_ATTR
 {
   return svundef3_s64();
 }
@@ -66,7 +74,7 @@ svint64x3_t test_svundef3_s64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 48 x i8> undef
 //
-svuint8x3_t test_svundef3_u8()
+svuint8x3_t test_svundef3_u8(void) MODE_ATTR
 {
   return svundef3_u8();
 }
@@ -79,7 +87,7 @@ svuint8x3_t test_svundef3_u8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 24 x i16> undef
 //
-svuint16x3_t test_svundef3_u16()
+svuint16x3_t test_svundef3_u16(void) MODE_ATTR
 {
   return svundef3_u16();
 }
@@ -92,7 +100,7 @@ svuint16x3_t test_svundef3_u16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 12 x i32> undef
 //
-svuint32x3_t test_svundef3_u32()
+svuint32x3_t test_svundef3_u32(void) MODE_ATTR
 {
   return svundef3_u32();
 }
@@ -105,7 +113,7 @@ svuint32x3_t test_svundef3_u32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 6 x i64> undef
 //
-svuint64x3_t test_svundef3_u64()
+svuint64x3_t test_svundef3_u64(void) MODE_ATTR
 {
   return svundef3_u64();
 }
@@ -118,7 +126,7 @@ svuint64x3_t test_svundef3_u64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 24 x half> undef
 //
-svfloat16x3_t test_svundef3_f16()
+svfloat16x3_t test_svundef3_f16(void) MODE_ATTR
 {
   return svundef3_f16();
 }
@@ -131,7 +139,7 @@ svfloat16x3_t test_svundef3_f16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 12 x float> undef
 //
-svfloat32x3_t test_svundef3_f32()
+svfloat32x3_t test_svundef3_f32(void) MODE_ATTR
 {
   return svundef3_f32();
 }
@@ -144,7 +152,7 @@ svfloat32x3_t test_svundef3_f32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 6 x double> undef
 //
-svfloat64x3_t test_svundef3_f64()
+svfloat64x3_t test_svundef3_f64(void) MODE_ATTR
 {
   return svundef3_f64();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
index 0c7130af9ba6a..d58e47d510752 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4-bfloat.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef4_bf16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 32 x bfloat> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x bfloat> undef
 //
-svbfloat16x4_t test_svundef4_bf16()
+svbfloat16x4_t test_svundef4_bf16(void) MODE_ATTR
 {
   return svundef4_bf16();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c
index a574f8454144f..fd736ac0615aa 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_undef4.c
@@ -1,11 +1,19 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -O2 -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 // CHECK-LABEL: @test_svundef4_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret <vscale x 64 x i8> undef
@@ -14,7 +22,7 @@
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> undef
 //
-svint8x4_t test_svundef4_s8()
+svint8x4_t test_svundef4_s8(void) MODE_ATTR
 {
   return svundef4_s8();
 }
@@ -27,7 +35,7 @@ svint8x4_t test_svundef4_s8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> undef
 //
-svint16x4_t test_svundef4_s16()
+svint16x4_t test_svundef4_s16(void) MODE_ATTR
 {
   return svundef4_s16();
 }
@@ -40,7 +48,7 @@ svint16x4_t test_svundef4_s16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> undef
 //
-svint32x4_t test_svundef4_s32()
+svint32x4_t test_svundef4_s32(void) MODE_ATTR
 {
   return svundef4_s32();
 }
@@ -53,7 +61,7 @@ svint32x4_t test_svundef4_s32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> undef
 //
-svint64x4_t test_svundef4_s64()
+svint64x4_t test_svundef4_s64(void) MODE_ATTR
 {
   return svundef4_s64();
 }
@@ -66,7 +74,7 @@ svint64x4_t test_svundef4_s64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 64 x i8> undef
 //
-svuint8x4_t test_svundef4_u8()
+svuint8x4_t test_svundef4_u8(void) MODE_ATTR
 {
   return svundef4_u8();
 }
@@ -79,7 +87,7 @@ svuint8x4_t test_svundef4_u8()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x i16> undef
 //
-svuint16x4_t test_svundef4_u16()
+svuint16x4_t test_svundef4_u16(void) MODE_ATTR
 {
   return svundef4_u16();
 }
@@ -92,7 +100,7 @@ svuint16x4_t test_svundef4_u16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i32> undef
 //
-svuint32x4_t test_svundef4_u32()
+svuint32x4_t test_svundef4_u32(void) MODE_ATTR
 {
   return svundef4_u32();
 }
@@ -105,7 +113,7 @@ svuint32x4_t test_svundef4_u32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i64> undef
 //
-svuint64x4_t test_svundef4_u64()
+svuint64x4_t test_svundef4_u64(void) MODE_ATTR
 {
   return svundef4_u64();
 }
@@ -118,7 +126,7 @@ svuint64x4_t test_svundef4_u64()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 32 x half> undef
 //
-svfloat16x4_t test_svundef4_f16()
+svfloat16x4_t test_svundef4_f16(void) MODE_ATTR
 {
   return svundef4_f16();
 }
@@ -131,7 +139,7 @@ svfloat16x4_t test_svundef4_f16()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 16 x float> undef
 //
-svfloat32x4_t test_svundef4_f32()
+svfloat32x4_t test_svundef4_f32(void) MODE_ATTR
 {
   return svundef4_f32();
 }
@@ -144,7 +152,7 @@ svfloat32x4_t test_svundef4_f32()
 // CPP-CHECK-NEXT:  entry:
 // CPP-CHECK-NEXT:    ret <vscale x 8 x double> undef
 //
-svfloat64x4_t test_svundef4_f64()
+svfloat64x4_t test_svundef4_f64(void) MODE_ATTR
 {
   return svundef4_f64();
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c
index aaadcb69f2739..90b153a3926a3 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_unpklo.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sunpklo.nxv8i16(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svunpklo_s16(svint8_t op)
+svint16_t test_svunpklo_s16(svint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_s16,,)(op);
 }
@@ -39,7 +47,7 @@ svint16_t test_svunpklo_s16(svint8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sunpklo.nxv4i32(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svunpklo_s32(svint16_t op)
+svint32_t test_svunpklo_s32(svint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_s32,,)(op);
 }
@@ -54,7 +62,7 @@ svint32_t test_svunpklo_s32(svint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sunpklo.nxv2i64(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svunpklo_s64(svint32_t op)
+svint64_t test_svunpklo_s64(svint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_s64,,)(op);
 }
@@ -69,7 +77,7 @@ svint64_t test_svunpklo_s64(svint32_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uunpklo.nxv8i16(<vscale x 16 x i8> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svunpklo_u16(svuint8_t op)
+svuint16_t test_svunpklo_u16(svuint8_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_u16,,)(op);
 }
@@ -84,7 +92,7 @@ svuint16_t test_svunpklo_u16(svuint8_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uunpklo.nxv4i32(<vscale x 8 x i16> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svunpklo_u32(svuint16_t op)
+svuint32_t test_svunpklo_u32(svuint16_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_u32,,)(op);
 }
@@ -99,7 +107,7 @@ svuint32_t test_svunpklo_u32(svuint16_t op)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uunpklo.nxv2i64(<vscale x 4 x i32> [[OP:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svunpklo_u64(svuint32_t op)
+svuint64_t test_svunpklo_u64(svuint32_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_u64,,)(op);
 }
@@ -116,7 +124,7 @@ svuint64_t test_svunpklo_u64(svuint32_t op)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svunpklo_b(svbool_t op)
+svbool_t test_svunpklo_b(svbool_t op) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svunpklo,_b,,)(op);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c
index 2ef08d496e179..c3c22b33d56bc 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_usdot.c
@@ -1,13 +1,20 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-
 // RUN: %clang_cc1 -target-feature +i8mm -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -target-feature +i8mm -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -target-feature +i8mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -target-feature +i8mm -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +i8mm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +i8mm -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3
@@ -25,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) {
+svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot, _s32, , )(x, y, z);
 }
 
@@ -43,7 +50,7 @@ svint32_t test_svusdot_s32(svint32_t x, svuint8_t y, svint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) {
+svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot, _n_s32, , )(x, y, z);
 }
 
@@ -57,7 +64,7 @@ svint32_t test_svusdot_n_s32(svint32_t x, svuint8_t y, int8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 0)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) {
+svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 0);
 }
 
@@ -71,7 +78,7 @@ svint32_t test_svusdot_lane_s32_0(svint32_t x, svuint8_t y, svint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 1)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) {
+svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 1);
 }
 
@@ -85,7 +92,7 @@ svint32_t test_svusdot_lane_s32_1(svint32_t x, svuint8_t y, svint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 2)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) {
+svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 2);
 }
 
@@ -99,6 +106,6 @@ svint32_t test_svusdot_lane_s32_2(svint32_t x, svuint8_t y, svint8_t z) {
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usdot.lane.nxv4i32(<vscale x 4 x i32> [[X:%.*]], <vscale x 16 x i8> [[Y:%.*]], <vscale x 16 x i8> [[Z:%.*]], i32 3)
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svusdot_lane_s32_3(svint32_t x, svuint8_t y, svint8_t z) {
+svint32_t test_svusdot_lane_s32_3(svint32_t x, svuint8_t y, svint8_t z) MODE_ATTR {
   return SVE_ACLE_FUNC(svusdot_lane, _s32, , )(x, y, z, 3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1-bfloat.c
index 82e720d254ab2..91863a400480a 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp1.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svuzp1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
index f5c6268cf5cba..3581c04db0858 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp1.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svuzp1_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svuzp1_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svuzp1_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svuzp1_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svuzp1_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svuzp1_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svuzp1_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svuzp1_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svuzp1_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svuzp1_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svuzp1_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svuzp1_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzp1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svuzp1_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svuzp1_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svuzp1_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzp1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svuzp1_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svuzp1_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svuzp1_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzp1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svuzp1_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svuzp1_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svuzp1_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzp1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svuzp1_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svuzp1_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svuzp1_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzp1.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svuzp1_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svuzp1_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svuzp1_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzp1.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svuzp1_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svuzp1_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svuzp1_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzp1.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svuzp1_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svuzp1_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp1,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svuzp1_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp1_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp1_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp1_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svuzp1_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp1_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svuzp1_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp1_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svuzp1_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp1_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp1_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp1_b64(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2-bfloat.c
index 7bb7a93f81420..83ba97e40d527 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.uzp2.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svuzp2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
index 965d10ea9efc9..959afbd72e090 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_uzp2.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svuzp2_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svuzp2_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svuzp2_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svuzp2_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svuzp2_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svuzp2_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svuzp2_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svuzp2_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svuzp2_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svuzp2_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svuzp2_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svuzp2_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uzp2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svuzp2_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svuzp2_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svuzp2_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uzp2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svuzp2_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svuzp2_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svuzp2_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uzp2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svuzp2_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svuzp2_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svuzp2_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uzp2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svuzp2_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svuzp2_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svuzp2_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.uzp2.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svuzp2_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svuzp2_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svuzp2_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.uzp2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svuzp2_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svuzp2_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svuzp2_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.uzp2.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svuzp2_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svuzp2_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svuzp2,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svuzp2_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp2_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp2_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp2_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svuzp2_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp2_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svuzp2_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp2_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svuzp2_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.uzp2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svuzp2_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svuzp2_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svuzp2_b64(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilele.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilele.c
index eee0096882cf2..1b6e4d5fc9c94 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilele.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilele.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilele_b8_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilele_b8_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b8,_s32,,)(op1, op2);
 }
@@ -41,7 +49,7 @@ svbool_t test_svwhilele_b8_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b16_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilele_b16_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b16,_s32,,)(op1, op2);
 }
@@ -58,7 +66,7 @@ svbool_t test_svwhilele_b16_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b32_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilele_b32_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b32,_s32,,)(op1, op2);
 }
@@ -75,7 +83,7 @@ svbool_t test_svwhilele_b32_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b64_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilele_b64_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b64,_s32,,)(op1, op2);
 }
@@ -90,7 +98,7 @@ svbool_t test_svwhilele_b64_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilele_b8_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilele_b8_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b8,_u32,,)(op1, op2);
 }
@@ -107,7 +115,7 @@ svbool_t test_svwhilele_b8_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b16_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilele_b16_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b16,_u32,,)(op1, op2);
 }
@@ -124,7 +132,7 @@ svbool_t test_svwhilele_b16_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b32_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilele_b32_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b32,_u32,,)(op1, op2);
 }
@@ -141,7 +149,7 @@ svbool_t test_svwhilele_b32_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b64_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilele_b64_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b64,_u32,,)(op1, op2);
 }
@@ -156,7 +164,7 @@ svbool_t test_svwhilele_b64_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilele.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilele_b8_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilele_b8_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b8,_s64,,)(op1, op2);
 }
@@ -173,7 +181,7 @@ svbool_t test_svwhilele_b8_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b16_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilele_b16_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b16,_s64,,)(op1, op2);
 }
@@ -190,7 +198,7 @@ svbool_t test_svwhilele_b16_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b32_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilele_b32_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b32,_s64,,)(op1, op2);
 }
@@ -207,7 +215,7 @@ svbool_t test_svwhilele_b32_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b64_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilele_b64_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b64,_s64,,)(op1, op2);
 }
@@ -222,7 +230,7 @@ svbool_t test_svwhilele_b64_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilels.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b8,_u64,,)(op1, op2);
 }
@@ -239,7 +247,7 @@ svbool_t test_svwhilele_b8_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b16,_u64,,)(op1, op2);
 }
@@ -256,7 +264,7 @@ svbool_t test_svwhilele_b16_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b32,_u64,,)(op1, op2);
 }
@@ -273,7 +281,7 @@ svbool_t test_svwhilele_b32_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilele_b64_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilele_b64,_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilelt.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilelt.c
index 4994eb27e6307..69ce0739fa6eb 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilelt.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_whilelt.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilelt_b8_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilelt_b8_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b8,_s32,,)(op1, op2);
 }
@@ -41,7 +49,7 @@ svbool_t test_svwhilelt_b8_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b16_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilelt_b16_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b16,_s32,,)(op1, op2);
 }
@@ -58,7 +66,7 @@ svbool_t test_svwhilelt_b16_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b32_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilelt_b32_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b32,_s32,,)(op1, op2);
 }
@@ -75,7 +83,7 @@ svbool_t test_svwhilelt_b32_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b64_s32(int32_t op1, int32_t op2)
+svbool_t test_svwhilelt_b64_s32(int32_t op1, int32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b64,_s32,,)(op1, op2);
 }
@@ -90,7 +98,7 @@ svbool_t test_svwhilelt_b64_s32(int32_t op1, int32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i32(i32 [[OP1:%.*]], i32 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilelt_b8_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilelt_b8_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b8,_u32,,)(op1, op2);
 }
@@ -107,7 +115,7 @@ svbool_t test_svwhilelt_b8_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b16_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilelt_b16_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b16,_u32,,)(op1, op2);
 }
@@ -124,7 +132,7 @@ svbool_t test_svwhilelt_b16_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b32_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilelt_b32_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b32,_u32,,)(op1, op2);
 }
@@ -141,7 +149,7 @@ svbool_t test_svwhilelt_b32_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b64_u32(uint32_t op1, uint32_t op2)
+svbool_t test_svwhilelt_b64_u32(uint32_t op1, uint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b64,_u32,,)(op1, op2);
 }
@@ -156,7 +164,7 @@ svbool_t test_svwhilelt_b64_u32(uint32_t op1, uint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelt.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b8,_s64,,)(op1, op2);
 }
@@ -173,7 +181,7 @@ svbool_t test_svwhilelt_b8_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b16,_s64,,)(op1, op2);
 }
@@ -190,7 +198,7 @@ svbool_t test_svwhilelt_b16_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b32,_s64,,)(op1, op2);
 }
@@ -207,7 +215,7 @@ svbool_t test_svwhilelt_b32_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2)
+svbool_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b64,_s64,,)(op1, op2);
 }
@@ -222,7 +230,7 @@ svbool_t test_svwhilelt_b64_s64(int64_t op1, int64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.whilelo.nxv16i1.i64(i64 [[OP1:%.*]], i64 [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b8,_u64,,)(op1, op2);
 }
@@ -239,7 +247,7 @@ svbool_t test_svwhilelt_b8_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b16,_u64,,)(op1, op2);
 }
@@ -256,7 +264,7 @@ svbool_t test_svwhilelt_b16_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b32,_u64,,)(op1, op2);
 }
@@ -273,7 +281,7 @@ svbool_t test_svwhilelt_b32_u64(uint64_t op1, uint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> [[TMP0]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
 //
-svbool_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2)
+svbool_t test_svwhilelt_b64_u64(uint64_t op1, uint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svwhilelt_b64,_u64,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1-bfloat.c
index dd1533c508bbb..31d5e34e3cd84 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip1.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svzip1_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
index 2d6c8d1ed6bf7..95a0f499248f1 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip1.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svzip1_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svzip1_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svzip1_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svzip1_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svzip1_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svzip1_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svzip1_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svzip1_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svzip1_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svzip1_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svzip1_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svzip1_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zip1.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svzip1_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svzip1_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svzip1_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zip1.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svzip1_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svzip1_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svzip1_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zip1.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svzip1_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svzip1_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svzip1_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zip1.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svzip1_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svzip1_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svzip1_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zip1.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svzip1_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svzip1_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svzip1_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zip1.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svzip1_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svzip1_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svzip1_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zip1.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svzip1_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svzip1_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip1,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svzip1_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip1_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svzip1_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip1_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svzip1_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip1_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svzip1_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip1_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svzip1_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip1.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip1_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svzip1_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip1_b64(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2-bfloat.c
index 275ab9073f817..d750fae041840 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2-bfloat.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2-bfloat.c
@@ -1,13 +1,21 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -target-feature +bf16 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-
-// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -25,7 +33,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.zip2.nxv8bf16(<vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
 //
-svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svzip2_bf16(svbfloat16_t op1, svbfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_bf16,,)(op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
index 2a7418fb518f3..5b19cfd673f3d 100644
--- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
+++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_zip2.c
@@ -5,8 +5,16 @@
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s
 // RUN: %clang_cc1 -fclang-abi-compat=latest -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sve -S -disable-O0-optnone -Werror -o /dev/null %s
+// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64 -target-feature +sme -S -disable-O0-optnone -Werror -o /dev/null %s
+
 #include <arm_sve.h>
 
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
 #ifdef SVE_OVERLOADED_FORMS
 // A simple used,unused... macro, long enough to represent any SVE builtin.
 #define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
@@ -24,7 +32,7 @@
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svint8_t test_svzip2_s8(svint8_t op1, svint8_t op2)
+svint8_t test_svzip2_s8(svint8_t op1, svint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_s8,,)(op1, op2);
 }
@@ -39,7 +47,7 @@ svint8_t test_svzip2_s8(svint8_t op1, svint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svint16_t test_svzip2_s16(svint16_t op1, svint16_t op2)
+svint16_t test_svzip2_s16(svint16_t op1, svint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_s16,,)(op1, op2);
 }
@@ -54,7 +62,7 @@ svint16_t test_svzip2_s16(svint16_t op1, svint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svint32_t test_svzip2_s32(svint32_t op1, svint32_t op2)
+svint32_t test_svzip2_s32(svint32_t op1, svint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_s32,,)(op1, op2);
 }
@@ -69,7 +77,7 @@ svint32_t test_svzip2_s32(svint32_t op1, svint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svint64_t test_svzip2_s64(svint64_t op1, svint64_t op2)
+svint64_t test_svzip2_s64(svint64_t op1, svint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_s64,,)(op1, op2);
 }
@@ -84,7 +92,7 @@ svint64_t test_svzip2_s64(svint64_t op1, svint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.zip2.nxv16i8(<vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP0]]
 //
-svuint8_t test_svzip2_u8(svuint8_t op1, svuint8_t op2)
+svuint8_t test_svzip2_u8(svuint8_t op1, svuint8_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_u8,,)(op1, op2);
 }
@@ -99,7 +107,7 @@ svuint8_t test_svzip2_u8(svuint8_t op1, svuint8_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.zip2.nxv8i16(<vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP0]]
 //
-svuint16_t test_svzip2_u16(svuint16_t op1, svuint16_t op2)
+svuint16_t test_svzip2_u16(svuint16_t op1, svuint16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_u16,,)(op1, op2);
 }
@@ -114,7 +122,7 @@ svuint16_t test_svzip2_u16(svuint16_t op1, svuint16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.zip2.nxv4i32(<vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP0]]
 //
-svuint32_t test_svzip2_u32(svuint32_t op1, svuint32_t op2)
+svuint32_t test_svzip2_u32(svuint32_t op1, svuint32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_u32,,)(op1, op2);
 }
@@ -129,7 +137,7 @@ svuint32_t test_svzip2_u32(svuint32_t op1, svuint32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.zip2.nxv2i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
 //
-svuint64_t test_svzip2_u64(svuint64_t op1, svuint64_t op2)
+svuint64_t test_svzip2_u64(svuint64_t op1, svuint64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_u64,,)(op1, op2);
 }
@@ -144,7 +152,7 @@ svuint64_t test_svzip2_u64(svuint64_t op1, svuint64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.zip2.nxv8f16(<vscale x 8 x half> [[OP1:%.*]], <vscale x 8 x half> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x half> [[TMP0]]
 //
-svfloat16_t test_svzip2_f16(svfloat16_t op1, svfloat16_t op2)
+svfloat16_t test_svzip2_f16(svfloat16_t op1, svfloat16_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_f16,,)(op1, op2);
 }
@@ -159,7 +167,7 @@ svfloat16_t test_svzip2_f16(svfloat16_t op1, svfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.zip2.nxv4f32(<vscale x 4 x float> [[OP1:%.*]], <vscale x 4 x float> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-svfloat32_t test_svzip2_f32(svfloat32_t op1, svfloat32_t op2)
+svfloat32_t test_svzip2_f32(svfloat32_t op1, svfloat32_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_f32,,)(op1, op2);
 }
@@ -174,7 +182,7 @@ svfloat32_t test_svzip2_f32(svfloat32_t op1, svfloat32_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.zip2.nxv2f64(<vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 2 x double> [[TMP0]]
 //
-svfloat64_t test_svzip2_f64(svfloat64_t op1, svfloat64_t op2)
+svfloat64_t test_svzip2_f64(svfloat64_t op1, svfloat64_t op2) MODE_ATTR
 {
   return SVE_ACLE_FUNC(svzip2,_f64,,)(op1, op2);
 }
@@ -189,7 +197,7 @@ svfloat64_t test_svzip2_f64(svfloat64_t op1, svfloat64_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.nxv16i1(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip2_b8(svbool_t op1, svbool_t op2)
+svbool_t test_svzip2_b8(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip2_b8(op1, op2);
 }
@@ -204,7 +212,7 @@ svbool_t test_svzip2_b8(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b16(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2)
+svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip2_b16(op1, op2);
 }
@@ -219,7 +227,7 @@ svbool_t test_svzip2_b16(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b32(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2)
+svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip2_b32(op1, op2);
 }
@@ -234,7 +242,7 @@ svbool_t test_svzip2_b32(svbool_t op1, svbool_t op2)
 // CPP-CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.zip2.b64(<vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
 //
-svbool_t test_svzip2_b64(svbool_t op1, svbool_t op2)
+svbool_t test_svzip2_b64(svbool_t op1, svbool_t op2) MODE_ATTR
 {
   return svzip2_b64(op1, op2);
 }
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp
index a12b57db56a40..8d32b5265b003 100644
--- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2.cpp
@@ -40,1153 +40,1153 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svhistseg_s8' needs target feature sve2}}
   // overload-error at +1 {{'svhistseg' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistseg,_s8,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrdmulh_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_s8,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrdmulh_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_n_s8,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqdmulh_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_s8,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmulh_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_n_s8,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svsra_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svsra' needs target feature sve2}}
+  // expected-error at +2 {{'svsra_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsra' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsra,_n_s8,,)(svundef_s8(), svundef_s8(), 1);
-  // expected-error at +2 {{'svnbsl_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svnbsl_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqabs_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s8,_z,)(pg, svundef_s8());
-  // expected-error at +2 {{'svqabs_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s8,_m,)(svundef_s8(), pg, svundef_s8());
-  // expected-error at +2 {{'svqabs_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s8,_x,)(pg, svundef_s8());
-  // expected-error at +2 {{'svcadd_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svcadd' needs target feature sve2}}
+  // expected-error at +2 {{'svcadd_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcadd' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcadd,_s8,,)(svundef_s8(), svundef_s8(), 90);
-  // expected-error at +2 {{'svtbl2_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_s8,,)(svundef2_s8(), svundef_u8());
-  // expected-error at +2 {{'svhsubr_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsubr_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsubr_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsubr_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhsubr_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhsubr_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'sveortb_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'sveortb_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svbcax_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svbcax_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqshlu_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshlu_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshlu_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshlu_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshlu,_n_s8,_z,)(pg, svundef_s8(), 1);
-  // expected-error at +2 {{'svqrshl_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqrshl_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqrshl_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svcmla_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svcmla' needs target feature sve2}}
+  // expected-error at +2 {{'svcmla_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcmla' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcmla,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8(), 90);
-  // expected-error at +2 {{'svqsubr_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsubr_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsubr_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsubr_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqsubr_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqsubr_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrshr_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshr_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshr,_n_s8,_z,)(pg, svundef_s8(), 1);
-  // expected-error at +2 {{'svaddp_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaddp_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqadd_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqadd_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqadd_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqadd_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqadd_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqadd_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svtbx_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_s8,,)(svundef_s8(), svundef_s8(), svundef_u8());
-  // expected-error at +2 {{'svqrdcmlah_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdcmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdcmlah_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdcmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdcmlah,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8(), 90);
-  // expected-error at +2 {{'svminp_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svminp_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsub_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsub_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsub_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqsub_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqsub_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqsub_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrsra_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svrsra' needs target feature sve2}}
+  // expected-error at +2 {{'svrsra_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsra' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsra,_n_s8,,)(svundef_s8(), svundef_s8(), 1);
-  // expected-error at +2 {{'sveor3_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'sveor3_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svhadd_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhadd_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhadd_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhadd_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhadd_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhadd_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqrdmlsh_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrdmlsh_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svmaxp_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmaxp_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s8,_x,)(pg, svundef_s8(), svundef_s8());
   // expected-error at +2 {{'svmatch_s8' needs target feature sve2}}
   // overload-error at +1 {{'svmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svmatch,_s8,,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svwhilerw_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_s8,,)(const_i8_ptr, const_i8_ptr);
-  // expected-error at +2 {{'svqcadd_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqcadd' needs target feature sve2}}
+  // expected-error at +2 {{'svqcadd_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqcadd' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqcadd,_s8,,)(svundef_s8(), svundef_s8(), 90);
-  // expected-error at +2 {{'svrhadd_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrhadd_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrhadd_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrhadd_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrhadd_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrhadd_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svwhilewr_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_s8,,)(const_i8_ptr, const_i8_ptr);
-  // expected-error at +2 {{'svsli_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svsli' needs target feature sve2}}
+  // expected-error at +2 {{'svsli_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsli' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsli,_n_s8,,)(svundef_s8(), svundef_s8(), 1);
   // expected-error at +2 {{'svnmatch_s8' needs target feature sve2}}
   // overload-error at +1 {{'svnmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svnmatch,_s8,,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaba_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaba_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svuqadd_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s8,_m,)(pg, svundef_s8(), svundef_u8());
-  // expected-error at +2 {{'svuqadd_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s8,_m,)(pg, svundef_s8(), u8);
-  // expected-error at +2 {{'svuqadd_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s8,_z,)(pg, svundef_s8(), svundef_u8());
-  // expected-error at +2 {{'svuqadd_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s8,_z,)(pg, svundef_s8(), u8);
-  // expected-error at +2 {{'svuqadd_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s8,_x,)(pg, svundef_s8(), svundef_u8());
-  // expected-error at +2 {{'svuqadd_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s8,_x,)(pg, svundef_s8(), u8);
-  // expected-error at +2 {{'sveorbt_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'sveorbt_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svbsl_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svbsl_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svhsub_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsub_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsub_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svhsub_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhsub_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svhsub_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqrdmlah_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqrdmlah_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svbsl2n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svbsl2n_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svsri_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svsri' needs target feature sve2}}
+  // expected-error at +2 {{'svsri_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsri' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsri,_n_s8,,)(svundef_s8(), svundef_s8(), 1);
-  // expected-error at +2 {{'svbsl1n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_s8,,)(svundef_s8(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svbsl1n_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_s8,,)(svundef_s8(), svundef_s8(), i8);
-  // expected-error at +2 {{'svrshl_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrshl_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svrshl_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s8,_x,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqneg_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s8,_z,)(pg, svundef_s8());
-  // expected-error at +2 {{'svqneg_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s8,_m,)(svundef_s8(), pg, svundef_s8());
-  // expected-error at +2 {{'svqneg_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s8,_x,)(pg, svundef_s8());
-  // expected-error at +2 {{'svxar_n_s8' needs target feature sve2}}
-  // overload-error at +1 {{'svxar' needs target feature sve2}}
+  // expected-error at +2 {{'svxar_n_s8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svxar' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svxar,_n_s8,,)(svundef_s8(), svundef_s8(), 1);
-  // expected-error at +2 {{'svqshl_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s8,_z,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s8,_m,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s8,_x,)(pg, svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_n_s8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s8,_z,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqshl_n_s8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s8,_m,)(pg, svundef_s8(), i8);
-  // expected-error at +2 {{'svqshl_n_s8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s8,_x,)(pg, svundef_s8(), i8);
 
-  // expected-error at +2 {{'svmullb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmullb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqrshrunb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshrunb' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshrunb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshrunb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshrunb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmlalbt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlalbt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqrdmulh_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrdmulh_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqrdmulh_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh_lane,_s16,,)(svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svaddwb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_s16,,)(svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svaddwb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_s16,,)(svundef_s16(), i8);
-  // expected-error at +2 {{'svsubhnb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsubhnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmulh_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmulh_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmulh_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh_lane,_s16,,)(svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqshrunt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqshrunt' needs target feature sve2}}
+  // expected-error at +2 {{'svqshrunt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshrunt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshrunt,_n_s16,,)(svundef_u8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svrsubhnt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrsubhnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16);
-  // expected-error at +2 {{'svnbsl_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svnbsl_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlslb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlslb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svsubhnt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsubhnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqabs_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s16,_z,)(pg, svundef_s16());
-  // expected-error at +2 {{'svqabs_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s16,_m,)(svundef_s16(), pg, svundef_s16());
-  // expected-error at +2 {{'svqabs_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s16,_x,)(pg, svundef_s16());
-  // expected-error at +2 {{'svaddlbt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaddlbt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svtbl2_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_s16,,)(svundef2_s16(), svundef_u16());
-  // expected-error at +2 {{'svshrnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svshrnt' needs target feature sve2}}
+  // expected-error at +2 {{'svshrnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svshrnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svhsubr_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsubr_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsubr_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsubr_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhsubr_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhsubr_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'sveortb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'sveortb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqxtnb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_s16,,)(svundef_s16());
-  // expected-error at +2 {{'svmlalt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmlalt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svshrnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svshrnb' needs target feature sve2}}
+  // expected-error at +2 {{'svshrnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svshrnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svshrnb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'svaddhnt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddhnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16);
-  // expected-error at +2 {{'svmls_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmls_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmls_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmls_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmls_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmlalt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlalt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svbcax_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svbcax_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqxtnt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_s16,,)(svundef_s8(), svundef_s16());
-  // expected-error at +2 {{'svqdmlalb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlalb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqrshl_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqrshl_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqrshl_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svsublbt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svsublbt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqshrnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqshrnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqshrnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshrnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmullt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmullt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svsublt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svsublt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqdmlslbt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlslbt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svadalp_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s16,_z,)(pg, svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svadalp_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s16,_m,)(pg, svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svadalp_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s16,_x,)(pg, svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svmul_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmul_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmul_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmul_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmul_lane,_s16,,)(svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svsubwt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_s16,,)(svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svsubwt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_s16,,)(svundef_s16(), i8);
-  // expected-error at +2 {{'svqsubr_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsubr_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsubr_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsubr_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqsubr_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqsubr_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqrshrnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshrnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshrnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshrnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svaddp_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddp_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqadd_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqadd_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqadd_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqadd_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqadd_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqadd_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svabdlb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svabdlb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svtbx_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_s16,,)(svundef_s16(), svundef_s16(), svundef_u16());
-  // expected-error at +2 {{'svabdlt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svabdlt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqrshrnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshrnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshrnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshrnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshrnb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'svminp_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svminp_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsub_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsub_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsub_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqsub_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqsub_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqsub_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svrsubhnb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrsubhnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svaddhnb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddhnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svabalt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svabalt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqshrnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqshrnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqshrnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshrnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshrnb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'sveor3_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'sveor3_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svhadd_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhadd_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhadd_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhadd_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhadd_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhadd_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqshrunb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqshrunb' needs target feature sve2}}
+  // expected-error at +2 {{'svqshrunb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshrunb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshrunb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'svmovlb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_s16,,)(svundef_s8());
-  // expected-error at +2 {{'svqrdmlsh_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrdmlsh_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqrdmlsh_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmlslt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmlslt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svmaxp_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmaxp_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmullt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmullt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_s16,,)(svundef_s8(), i8);
   // expected-error at +2 {{'svmatch_s16' needs target feature sve2}}
   // overload-error at +1 {{'svmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svmatch,_s16,,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqxtunb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunb,_s16,,)(svundef_s16());
-  // expected-error at +2 {{'svmla_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmla_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmla_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmla_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmla_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svrshrnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrshrnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrshrnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshrnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshrnb,_n_s16,,)(svundef_s16(), 1);
-  // expected-error at +2 {{'svwhilerw_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_s16,,)(const_i16_ptr, const_i16_ptr);
-  // expected-error at +2 {{'svshllb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svshllb' needs target feature sve2}}
+  // expected-error at +2 {{'svshllb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svshllb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svshllb,_n_s16,,)(svundef_s8(), 2);
-  // expected-error at +2 {{'svrhadd_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrhadd_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svrhadd_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrhadd_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svrhadd_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrhadd_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svraddhnb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_s16,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svraddhnb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_s16,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svwhilewr_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_s16,,)(const_i16_ptr, const_i16_ptr);
-  // expected-error at +2 {{'svmlalb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmlalb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svsubwb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_s16,,)(svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svsubwb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_s16,,)(svundef_s16(), i8);
   // expected-error at +2 {{'svnmatch_s16' needs target feature sve2}}
   // overload-error at +1 {{'svnmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svnmatch,_s16,,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaba_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaba_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svraddhnt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_s16,,)(svundef_s8(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svraddhnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_s16,,)(svundef_s8(), svundef_s16(), i16);
-  // expected-error at +2 {{'svuqadd_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s16,_m,)(pg, svundef_s16(), svundef_u16());
-  // expected-error at +2 {{'svuqadd_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s16,_m,)(pg, svundef_s16(), u16);
-  // expected-error at +2 {{'svuqadd_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s16,_z,)(pg, svundef_s16(), svundef_u16());
-  // expected-error at +2 {{'svuqadd_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s16,_z,)(pg, svundef_s16(), u16);
-  // expected-error at +2 {{'svuqadd_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s16,_x,)(pg, svundef_s16(), svundef_u16());
-  // expected-error at +2 {{'svuqadd_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s16,_x,)(pg, svundef_s16(), u16);
-  // expected-error at +2 {{'sveorbt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'sveorbt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svbsl_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svbsl_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svshllt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svshllt' needs target feature sve2}}
+  // expected-error at +2 {{'svshllt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svshllt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svshllt,_n_s16,,)(svundef_s8(), 2);
-  // expected-error at +2 {{'svsubltb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svsubltb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svhsub_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsub_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsub_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svhsub_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhsub_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svhsub_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svaddlb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaddlb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqrdmlah_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqrdmlah_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqrdmlah_lane_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_lane_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah_lane,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmullb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svqdmullb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svbsl2n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svbsl2n_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svaddlt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svaddlt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svqxtunt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunt,_s16,,)(svundef_u8(), svundef_s16());
-  // expected-error at +2 {{'svqrshrunt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshrunt' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshrunt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshrunt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshrunt,_n_s16,,)(svundef_u8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svabalb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svabalb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svsublb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_s16,,)(svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svsublb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_s16,,)(svundef_s8(), i8);
-  // expected-error at +2 {{'svbsl1n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_s16,,)(svundef_s16(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svbsl1n_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_s16,,)(svundef_s16(), svundef_s16(), i16);
-  // expected-error at +2 {{'svrshl_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svrshl_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svrshl_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s16,_x,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svaddwt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_s16,,)(svundef_s16(), svundef_s8());
-  // expected-error at +2 {{'svaddwt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_s16,,)(svundef_s16(), i8);
-  // expected-error at +2 {{'svmlslb_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmlslb_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svmlslt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_s16,,)(svundef_s16(), svundef_s8(), svundef_s8());
-  // expected-error at +2 {{'svmlslt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_s16,,)(svundef_s16(), svundef_s8(), i8);
-  // expected-error at +2 {{'svqneg_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s16,_z,)(pg, svundef_s16());
-  // expected-error at +2 {{'svqneg_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s16,_m,)(svundef_s16(), pg, svundef_s16());
-  // expected-error at +2 {{'svqneg_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s16,_x,)(pg, svundef_s16());
-  // expected-error at +2 {{'svmovlt_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_s16,,)(svundef_s8());
-  // expected-error at +2 {{'svrshrnt_n_s16' needs target feature sve2}}
-  // overload-error at +1 {{'svrshrnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrshrnt_n_s16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshrnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshrnt,_n_s16,,)(svundef_s8(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqshl_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s16,_z,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s16,_m,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s16,_x,)(pg, svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_n_s16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s16,_z,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqshl_n_s16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s16,_m,)(pg, svundef_s16(), i16);
-  // expected-error at +2 {{'svqshl_n_s16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s16,_x,)(pg, svundef_s16(), i16);
 
-  // expected-error at +2 {{'svmullb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmullb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svmullb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb_lane,_s32,,)(svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqdmlalbt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlalbt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqrdmulh_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrdmulh_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svaddwb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_s32,,)(svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svaddwb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_s32,,)(svundef_s32(), i16);
-  // expected-error at +2 {{'svsubhnb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsubhnb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmulh_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmulh_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svrsubhnt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrsubhnt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32);
-  // expected-error at +2 {{'svnbsl_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svnbsl_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmlslb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlslb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlslb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svsubhnt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsubhnt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqabs_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s32,_z,)(pg, svundef_s32());
-  // expected-error at +2 {{'svqabs_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s32,_m,)(svundef_s32(), pg, svundef_s32());
-  // expected-error at +2 {{'svqabs_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s32,_x,)(pg, svundef_s32());
-  // expected-error at +2 {{'svwhilegt_b8_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b8_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b8,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilegt_b16_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b16_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b16,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilegt_b32_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b32_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b32,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilegt_b64_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b64_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b64,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svaddlbt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddlbt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svtbl2_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_s32,,)(svundef2_s32(), svundef_u32());
-  // expected-error at +2 {{'svhsubr_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsubr_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsubr_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsubr_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhsubr_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhsubr_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s32,_x,)(pg, svundef_s32(), i32);
   // expected-error at +2 {{'svhistcnt_s32_z' needs target feature sve2}}
   // overload-error at +1 {{'svhistcnt_z' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistcnt,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'sveortb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'sveortb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqxtnb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_s32,,)(svundef_s32());
-  // expected-error at +2 {{'svmlalt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmlalt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svmlalt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svaddhnt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddhnt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32);
   // expected-error at +2 {{'svldnt1uh_gather_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_s32' needs target feature sve2}}
@@ -1200,233 +1200,233 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1uh_gather_u32base_index_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_index_s32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svqdmlalt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlalt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlalt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svbcax_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svbcax_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqxtnt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_s32,,)(svundef_s16(), svundef_s32());
-  // expected-error at +2 {{'svqdmlalb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlalb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlalb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqrshl_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqrshl_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqrshl_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svcdot_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svcdot' needs target feature sve2}}
+  // expected-error at +2 {{'svcdot_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcdot' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcdot,_s32,,)(svundef_s32(), svundef_s8(), svundef_s8(), 90);
-  // expected-error at +2 {{'svsublbt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsublbt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmullt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmullt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmullt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt_lane,_s32,,)(svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svsublt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsublt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlslbt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlslbt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svadalp_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s32,_z,)(pg, svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svadalp_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s32,_m,)(pg, svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svadalp_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s32,_x,)(pg, svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svwhilege_b8_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b8_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b8,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilege_b16_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b16_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b16,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilege_b32_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b32_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b32,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svwhilege_b64_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b64_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b64,_s32,,)(i32, i32);
-  // expected-error at +2 {{'svsubwt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_s32,,)(svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svsubwt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_s32,,)(svundef_s32(), i16);
-  // expected-error at +2 {{'svqsubr_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsubr_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsubr_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsubr_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqsubr_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqsubr_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svaddp_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddp_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqadd_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqadd_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqadd_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqadd_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqadd_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqadd_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svabdlb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svabdlb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svtbx_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_s32,,)(svundef_s32(), svundef_s32(), svundef_u32());
-  // expected-error at +2 {{'svabdlt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svabdlt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svminp_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svminp_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsub_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsub_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsub_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqsub_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqsub_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqsub_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svrsubhnb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrsubhnb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svaddhnb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddhnb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svabalt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svabalt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'sveor3_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'sveor3_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svhadd_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhadd_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhadd_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhadd_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhadd_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhadd_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svmovlb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_s32,,)(svundef_s16());
   // expected-error at +2 {{'svstnt1_scatter_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -1440,35 +1440,35 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u32base_index_s32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _s32)(pg, svundef_u32(), i64, svundef_s32());
-  // expected-error at +2 {{'svqrdmlsh_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrdmlsh_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmlslt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmlslt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmlslt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svmaxp_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmaxp_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmullt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmullt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svmullt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt_lane,_s32,,)(svundef_s16(), svundef_s16(), 1);
   // expected-error at +2 {{'svldnt1sh_gather_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_s32' needs target feature sve2}}
@@ -1482,47 +1482,47 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sh_gather_u32base_index_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_index_s32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svqxtunb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunb,_s32,,)(svundef_s32());
-  // expected-error at +2 {{'svwhilerw_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_s32,,)(const_i32_ptr, const_i32_ptr);
-  // expected-error at +2 {{'svrhadd_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrhadd_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svrhadd_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrhadd_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svrhadd_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrhadd_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svraddhnb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_s32,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svraddhnb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_s32,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svwhilewr_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_s32,,)(const_i32_ptr, const_i32_ptr);
-  // expected-error at +2 {{'svmlalb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmlalb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svmlalb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
   // expected-error at +2 {{'svldnt1sb_gather_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_s32' needs target feature sve2}}
@@ -1533,11 +1533,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sb_gather_u32base_offset_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_offset_s32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svsubwb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_s32,,)(svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svsubwb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_s32,,)(svundef_s32(), i16);
   // expected-error at +2 {{'svldnt1ub_gather_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_s32' needs target feature sve2}}
@@ -1548,71 +1548,71 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1ub_gather_u32base_offset_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_offset_s32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_s32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svaba_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaba_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svraddhnt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_s32,,)(svundef_s16(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svraddhnt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_s32,,)(svundef_s16(), svundef_s32(), i32);
-  // expected-error at +2 {{'svuqadd_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s32,_m,)(pg, svundef_s32(), svundef_u32());
-  // expected-error at +2 {{'svuqadd_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s32,_m,)(pg, svundef_s32(), u32);
-  // expected-error at +2 {{'svuqadd_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s32,_z,)(pg, svundef_s32(), svundef_u32());
-  // expected-error at +2 {{'svuqadd_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s32,_z,)(pg, svundef_s32(), u32);
-  // expected-error at +2 {{'svuqadd_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s32,_x,)(pg, svundef_s32(), svundef_u32());
-  // expected-error at +2 {{'svuqadd_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s32,_x,)(pg, svundef_s32(), u32);
-  // expected-error at +2 {{'sveorbt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'sveorbt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svbsl_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svbsl_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svsubltb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsubltb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svhsub_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsub_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsub_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svhsub_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhsub_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svhsub_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s32,_x,)(pg, svundef_s32(), i32);
   // expected-error at +2 {{'svldnt1_gather_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_s32' needs target feature sve2}}
@@ -1626,26 +1626,26 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1_gather_u32base_index_s32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_index_s32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_s32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svaddlb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddlb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqrdmlah_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqrdmlah_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmullb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svqdmullb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqdmullb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb_lane,_s32,,)(svundef_s16(), svundef_s16(), 1);
   // expected-error at +2 {{'svstnt1h_scatter_u32base_s32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1h_scatter' needs target feature sve2}}
@@ -1668,243 +1668,243 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1b_scatter_u32base_offset_s32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1b_scatter_offset' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _s32)(pg, svundef_u32(), i64, svundef_s32());
-  // expected-error at +2 {{'svbsl2n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svbsl2n_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svaddlt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svaddlt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svqxtunt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunt,_s32,,)(svundef_u16(), svundef_s32());
-  // expected-error at +2 {{'svabalb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svabalb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svsublb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_s32,,)(svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svsublb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_s32,,)(svundef_s16(), i16);
-  // expected-error at +2 {{'svbsl1n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_s32,,)(svundef_s32(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svbsl1n_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_s32,,)(svundef_s32(), svundef_s32(), i32);
-  // expected-error at +2 {{'svrshl_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svrshl_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svrshl_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s32,_x,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svaddwt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_s32,,)(svundef_s32(), svundef_s16());
-  // expected-error at +2 {{'svaddwt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_s32,,)(svundef_s32(), i16);
-  // expected-error at +2 {{'svmlslb_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmlslb_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svmlslb_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svmlslt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16());
-  // expected-error at +2 {{'svmlslt_n_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_s32,,)(svundef_s32(), svundef_s16(), i16);
-  // expected-error at +2 {{'svmlslt_lane_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt_lane' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_lane_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt_lane' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt_lane,_s32,,)(svundef_s32(), svundef_s16(), svundef_s16(), 1);
-  // expected-error at +2 {{'svqneg_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s32,_z,)(pg, svundef_s32());
-  // expected-error at +2 {{'svqneg_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s32,_m,)(svundef_s32(), pg, svundef_s32());
-  // expected-error at +2 {{'svqneg_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s32,_x,)(pg, svundef_s32());
-  // expected-error at +2 {{'svmovlt_s32' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_s32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_s32,,)(svundef_s16());
-  // expected-error at +2 {{'svqshl_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s32,_z,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s32,_m,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s32,_x,)(pg, svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_n_s32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s32,_z,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqshl_n_s32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s32,_m,)(pg, svundef_s32(), i32);
-  // expected-error at +2 {{'svqshl_n_s32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s32,_x,)(pg, svundef_s32(), i32);
 
-  // expected-error at +2 {{'svmullb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmullb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmlalbt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlalbt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalbt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalbt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqrdmulh_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrdmulh_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmulh_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmulh,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svaddwb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_s64,,)(svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svaddwb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_s64,,)(svundef_s64(), i32);
-  // expected-error at +2 {{'svsubhnb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svsubhnb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svqdmulh_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqdmulh_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmulh' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmulh_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmulh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmulh,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svrsubhnt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrsubhnt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64);
-  // expected-error at +2 {{'svnbsl_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svnbsl_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqdmlslb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlslb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslb,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svsubhnt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svsubhnt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqabs_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s64,_z,)(pg, svundef_s64());
-  // expected-error at +2 {{'svqabs_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s64,_m,)(svundef_s64(), pg, svundef_s64());
-  // expected-error at +2 {{'svqabs_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqabs_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqabs_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqabs_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqabs,_s64,_x,)(pg, svundef_s64());
-  // expected-error at +2 {{'svwhilegt_b8_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b8_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b8,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilegt_b16_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b16_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b16,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilegt_b32_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b32_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b32,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilegt_b64_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b64_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b64,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svaddlbt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddlbt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlbt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlbt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlbt,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svtbl2_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_s64,,)(svundef2_s64(), svundef_u64());
-  // expected-error at +2 {{'svhsubr_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsubr_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsubr_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsubr_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhsubr_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhsubr_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_s64,_x,)(pg, svundef_s64(), i64);
   // expected-error at +2 {{'svhistcnt_s64_z' needs target feature sve2}}
   // overload-error at +1 {{'svhistcnt_z' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistcnt,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'sveortb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'sveortb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqxtnb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_s64,,)(svundef_s64());
-  // expected-error at +2 {{'svmlalt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmlalt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svaddhnt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svaddhnt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64);
   // expected-error at +2 {{'svldnt1uh_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_s64' needs target feature sve2}}
@@ -1927,221 +1927,221 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1uh_gather_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_index_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqdmlalt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlalt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svbcax_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svbcax_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqxtnt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_s64,,)(svundef_s32(), svundef_s64());
-  // expected-error at +2 {{'svqdmlalb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlalb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlalb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqrshl_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqrshl_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqrshl_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svsublbt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsublbt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublbt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublbt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublbt,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmullt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmullt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullt,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svsublt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsublt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svqdmlslbt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlslbt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslbt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslbt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svadalp_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s64,_z,)(pg, svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svadalp_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s64,_m,)(pg, svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svadalp_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_s64,_x,)(pg, svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svwhilege_b8_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b8_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b8,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilege_b16_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b16_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b16,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilege_b32_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b32_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b32,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svwhilege_b64_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b64_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b64,_s64,,)(i64, i64);
-  // expected-error at +2 {{'svsubwt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_s64,,)(svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svsubwt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_s64,,)(svundef_s64(), i32);
-  // expected-error at +2 {{'svqsubr_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsubr_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsubr_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsubr_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqsubr_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqsubr_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svaddp_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svaddp_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqadd_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqadd_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqadd_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqadd_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqadd_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqadd_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svabdlb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svabdlb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svtbx_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_s64,,)(svundef_s64(), svundef_s64(), svundef_u64());
-  // expected-error at +2 {{'svabdlt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svabdlt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svminp_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svminp_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsub_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsub_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsub_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqsub_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqsub_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqsub_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svrsubhnb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrsubhnb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svaddhnb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svaddhnb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svabalt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svabalt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'sveor3_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'sveor3_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svhadd_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhadd_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhadd_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhadd_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhadd_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhadd_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svmovlb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_s64,,)(svundef_s32());
   // expected-error at +2 {{'svstnt1_scatter_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -2164,29 +2164,29 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _s64)(pg, svundef_u64(), i64, svundef_s64());
-  // expected-error at +2 {{'svqrdmlsh_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrdmlsh_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlsh_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlsh' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlsh,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqdmlslt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmlslt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmlslt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmlslt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svmaxp_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svmaxp_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svmullt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmullt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_s64,,)(svundef_s32(), i32);
   // expected-error at +2 {{'svldnt1sh_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_s64' needs target feature sve2}}
@@ -2209,44 +2209,44 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sh_gather_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_index_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqxtunb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunb,_s64,,)(svundef_s64());
-  // expected-error at +2 {{'svwhilerw_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_s64,,)(const_i64_ptr, const_i64_ptr);
-  // expected-error at +2 {{'svrhadd_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrhadd_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svrhadd_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrhadd_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svrhadd_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrhadd_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svraddhnb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_s64,,)(svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svraddhnb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_s64,,)(svundef_s64(), i64);
-  // expected-error at +2 {{'svwhilewr_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_s64,,)(const_i64_ptr, const_i64_ptr);
-  // expected-error at +2 {{'svmlalb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmlalb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
   // expected-error at +2 {{'svldnt1sb_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_s64' needs target feature sve2}}
@@ -2260,11 +2260,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sb_gather_u64base_offset_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_offset_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svsubwb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_s64,,)(svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svsubwb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_s64,,)(svundef_s64(), i32);
   // expected-error at +2 {{'svldnt1ub_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_s64' needs target feature sve2}}
@@ -2278,41 +2278,41 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1ub_gather_u64base_offset_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_offset_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svaba_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svaba_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svraddhnt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_s64,,)(svundef_s32(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svraddhnt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_s64,,)(svundef_s32(), svundef_s64(), i64);
-  // expected-error at +2 {{'svuqadd_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s64,_m,)(pg, svundef_s64(), svundef_u64());
-  // expected-error at +2 {{'svuqadd_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s64,_m,)(pg, svundef_s64(), u64);
-  // expected-error at +2 {{'svuqadd_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s64,_z,)(pg, svundef_s64(), svundef_u64());
-  // expected-error at +2 {{'svuqadd_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s64,_z,)(pg, svundef_s64(), u64);
-  // expected-error at +2 {{'svuqadd_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_s64,_x,)(pg, svundef_s64(), svundef_u64());
-  // expected-error at +2 {{'svuqadd_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svuqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svuqadd_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svuqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svuqadd,_n_s64,_x,)(pg, svundef_s64(), u64);
-  // expected-error at +2 {{'sveorbt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'sveorbt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
   // expected-error at +2 {{'svldnt1sw_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sw_gather_s64' needs target feature sve2}}
@@ -2335,35 +2335,35 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sw_gather_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sw_gather_index_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svbsl_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svbsl_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svsubltb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsubltb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubltb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubltb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubltb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubltb,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svhsub_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsub_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsub_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svhsub_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhsub_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svhsub_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_s64,_x,)(pg, svundef_s64(), i64);
   // expected-error at +2 {{'svldnt1_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_s64' needs target feature sve2}}
@@ -2386,23 +2386,23 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1_gather_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_index_s64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_s64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svaddlb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddlb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svqrdmlah_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqrdmlah_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqrdmlah' needs target feature sve2}}
+  // expected-error at +2 {{'svqrdmlah_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrdmlah' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrdmlah,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svqdmullb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svqdmullb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqdmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svqdmullb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqdmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqdmullb,_n_s64,,)(svundef_s32(), i32);
   // expected-error at +2 {{'svldnt1uw_gather_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uw_gather_s64' needs target feature sve2}}
@@ -2458,17 +2458,17 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1b_scatter_u64base_offset_s64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1b_scatter_offset' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _s64)(pg, svundef_u64(), i64, svundef_s64());
-  // expected-error at +2 {{'svbsl2n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svbsl2n_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svaddlt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svaddlt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_s64,,)(svundef_s32(), i32);
   // expected-error at +2 {{'svstnt1w_scatter_u64base_s64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1w_scatter' needs target feature sve2}}
@@ -2491,980 +2491,980 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1w_scatter_u64base_index_s64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1w_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _s64)(pg, svundef_u64(), i64, svundef_s64());
-  // expected-error at +2 {{'svqxtunt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtunt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtunt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtunt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtunt,_s64,,)(svundef_u32(), svundef_s64());
-  // expected-error at +2 {{'svabalb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svabalb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svsublb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_s64,,)(svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svsublb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_s64,,)(svundef_s32(), i32);
-  // expected-error at +2 {{'svbsl1n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_s64,,)(svundef_s64(), svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svbsl1n_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_s64,,)(svundef_s64(), svundef_s64(), i64);
-  // expected-error at +2 {{'svrshl_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svrshl_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svrshl_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_s64,_x,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svaddwt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_s64,,)(svundef_s64(), svundef_s32());
-  // expected-error at +2 {{'svaddwt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_s64,,)(svundef_s64(), i32);
-  // expected-error at +2 {{'svmlslb_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmlslb_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svmlslt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_s64,,)(svundef_s64(), svundef_s32(), svundef_s32());
-  // expected-error at +2 {{'svmlslt_n_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_s64,,)(svundef_s64(), svundef_s32(), i32);
-  // expected-error at +2 {{'svqneg_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s64,_z,)(pg, svundef_s64());
-  // expected-error at +2 {{'svqneg_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s64,_m,)(svundef_s64(), pg, svundef_s64());
-  // expected-error at +2 {{'svqneg_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqneg_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqneg_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqneg_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqneg,_s64,_x,)(pg, svundef_s64());
-  // expected-error at +2 {{'svmovlt_s64' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_s64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_s64,,)(svundef_s32());
-  // expected-error at +2 {{'svqshl_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s64,_z,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s64,_m,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_s64,_x,)(pg, svundef_s64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_n_s64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s64,_z,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqshl_n_s64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s64,_m,)(pg, svundef_s64(), i64);
-  // expected-error at +2 {{'svqshl_n_s64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_s64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_s64,_x,)(pg, svundef_s64(), i64);
 
   // expected-error at +2 {{'svhistseg_u8' needs target feature sve2}}
   // overload-error at +1 {{'svhistseg' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistseg,_u8,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmullb_pair_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_pair_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb_pair,_u8,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmullb_pair_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_pair_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb_pair,_n_u8,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svnbsl_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svnbsl_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svtbl2_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_u8,,)(svundef2_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsubr_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsubr_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsubr_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsubr_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhsubr_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhsubr_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svpmul_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmul' needs target feature sve2}}
+  // expected-error at +2 {{'svpmul_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmul' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmul,_u8,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmul_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmul' needs target feature sve2}}
+  // expected-error at +2 {{'svpmul_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmul' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmul,_n_u8,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'sveortb_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'sveortb_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svbcax_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svbcax_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svqrshl_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u8,_z,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u8,_m,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u8,_x,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqrshl_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u8,_z,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqrshl_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u8,_m,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqrshl_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u8,_x,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svpmullt_pair_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_pair_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt_pair,_u8,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmullt_pair_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_pair_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt_pair,_n_u8,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svqsubr_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsubr_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsubr_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsubr_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqsubr_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqsubr_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svaddp_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svaddp_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqadd_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqadd_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqadd_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqadd_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqadd_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqadd_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svtbx_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svminp_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svminp_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svsqadd_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u8,_m,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svsqadd_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u8,_m,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svsqadd_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u8,_z,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svsqadd_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u8,_z,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svsqadd_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u8,_x,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svsqadd_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u8,_x,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqsub_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsub_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsub_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svqsub_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqsub_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svqsub_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'sveor3_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'sveor3_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svhadd_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhadd_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhadd_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhadd_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhadd_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhadd_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svmaxp_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmaxp_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u8,_x,)(pg, svundef_u8(), svundef_u8());
   // expected-error at +2 {{'svmatch_u8' needs target feature sve2}}
   // overload-error at +1 {{'svmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svmatch,_u8,,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svwhilerw_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_u8,,)(const_u8_ptr, const_u8_ptr);
-  // expected-error at +2 {{'svrhadd_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svrhadd_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svrhadd_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svrhadd_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svrhadd_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svrhadd_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svwhilewr_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_u8,,)(const_u8_ptr, const_u8_ptr);
   // expected-error at +2 {{'svnmatch_u8' needs target feature sve2}}
   // overload-error at +1 {{'svnmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svnmatch,_u8,,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svaba_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svaba_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'sveorbt_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'sveorbt_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svbsl_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svbsl_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svhsub_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u8,_z,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsub_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u8,_m,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsub_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u8,_x,)(pg, svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svhsub_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u8,_z,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhsub_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u8,_m,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svhsub_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u8,_x,)(pg, svundef_u8(), u8);
-  // expected-error at +2 {{'svbsl2n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svbsl2n_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svbsl1n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_u8,,)(svundef_u8(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svbsl1n_n_u8' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_u8' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_u8,,)(svundef_u8(), svundef_u8(), u8);
-  // expected-error at +2 {{'svrshl_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u8,_z,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u8,_m,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u8,_x,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svrshl_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u8,_z,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svrshl_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u8,_m,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svrshl_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u8,_x,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqshl_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u8,_z,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u8,_m,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u8,_x,)(pg, svundef_u8(), svundef_s8());
-  // expected-error at +2 {{'svqshl_n_u8_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u8_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u8,_z,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqshl_n_u8_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u8_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u8,_m,)(pg, svundef_u8(), i8);
-  // expected-error at +2 {{'svqshl_n_u8_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u8_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u8,_x,)(pg, svundef_u8(), i8);
 
-  // expected-error at +2 {{'svmullb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmullb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svpmullb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmullb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svaddwb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_u16,,)(svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svaddwb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_u16,,)(svundef_u16(), u8);
-  // expected-error at +2 {{'svsubhnb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_u16,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svsubhnb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_u16,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svrsubhnt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svrsubhnt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16);
-  // expected-error at +2 {{'svnbsl_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svnbsl_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svsubhnt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svsubhnt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16);
-  // expected-error at +2 {{'svtbl2_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_u16,,)(svundef2_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsubr_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsubr_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsubr_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsubr_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhsubr_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhsubr_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'sveortb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'sveortb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svqxtnb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_u16,,)(svundef_u16());
-  // expected-error at +2 {{'svmlalt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmlalt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'svaddhnt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaddhnt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16);
-  // expected-error at +2 {{'svbcax_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svbcax_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svqxtnt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_u16,,)(svundef_u8(), svundef_u16());
-  // expected-error at +2 {{'svqrshl_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u16,_z,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u16,_m,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u16,_x,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqrshl_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u16,_z,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svqrshl_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u16,_m,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svqrshl_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u16,_x,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svsublt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svsublt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svadalp_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u16,_z,)(pg, svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svadalp_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u16,_m,)(pg, svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svadalp_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u16,_x,)(pg, svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svpmullt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svpmullt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svsubwt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_u16,,)(svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svsubwt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_u16,,)(svundef_u16(), u8);
-  // expected-error at +2 {{'svqsubr_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsubr_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsubr_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsubr_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqsubr_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqsubr_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svaddp_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaddp_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqadd_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqadd_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqadd_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqadd_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqadd_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqadd_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svabdlb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svabdlb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svtbx_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svabdlt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svabdlt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svminp_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svminp_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svsqadd_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u16,_m,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svsqadd_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u16,_m,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svsqadd_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u16,_z,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svsqadd_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u16,_z,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svsqadd_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u16,_x,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svsqadd_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u16,_x,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svqsub_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsub_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsub_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svqsub_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqsub_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svqsub_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svrsubhnb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_u16,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svrsubhnb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_u16,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svaddhnb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_u16,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaddhnb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_u16,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svabalt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svabalt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'sveor3_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'sveor3_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svhadd_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhadd_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhadd_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhadd_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhadd_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhadd_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svmovlb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_u16,,)(svundef_u8());
-  // expected-error at +2 {{'svmaxp_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmaxp_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmullt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmullt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_u16,,)(svundef_u8(), u8);
   // expected-error at +2 {{'svmatch_u16' needs target feature sve2}}
   // overload-error at +1 {{'svmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svmatch,_u16,,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svwhilerw_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_u16,,)(const_u16_ptr, const_u16_ptr);
-  // expected-error at +2 {{'svrhadd_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svrhadd_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svrhadd_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svrhadd_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svrhadd_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svrhadd_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svraddhnb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_u16,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svraddhnb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_u16,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svwhilewr_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_u16,,)(const_u16_ptr, const_u16_ptr);
-  // expected-error at +2 {{'svmlalb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmlalb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'svsubwb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_u16,,)(svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svsubwb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_u16,,)(svundef_u16(), u8);
   // expected-error at +2 {{'svnmatch_u16' needs target feature sve2}}
   // overload-error at +1 {{'svnmatch' needs target feature sve2}}
   SVE_ACLE_FUNC(svnmatch,_u16,,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaba_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaba_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svraddhnt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_u16,,)(svundef_u8(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svraddhnt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_u16,,)(svundef_u8(), svundef_u16(), u16);
-  // expected-error at +2 {{'sveorbt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'sveorbt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svbsl_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svbsl_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svhsub_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u16,_z,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsub_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u16,_m,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsub_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u16,_x,)(pg, svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svhsub_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u16,_z,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhsub_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u16,_m,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svhsub_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u16,_x,)(pg, svundef_u16(), u16);
-  // expected-error at +2 {{'svaddlb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svaddlb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svbsl2n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svbsl2n_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svaddlt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svaddlt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svabalb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svabalb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'svsublb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_u16,,)(svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svsublb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_u16,,)(svundef_u8(), u8);
-  // expected-error at +2 {{'svbsl1n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_u16,,)(svundef_u16(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svbsl1n_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_u16,,)(svundef_u16(), svundef_u16(), u16);
-  // expected-error at +2 {{'svrshl_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u16,_z,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u16,_m,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u16,_x,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svrshl_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u16,_z,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svrshl_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u16,_m,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svrshl_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u16,_x,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svaddwt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_u16,,)(svundef_u16(), svundef_u8());
-  // expected-error at +2 {{'svaddwt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_u16,,)(svundef_u16(), u8);
-  // expected-error at +2 {{'svmlslb_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmlslb_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'svmlslt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_u16,,)(svundef_u16(), svundef_u8(), svundef_u8());
-  // expected-error at +2 {{'svmlslt_n_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_u16,,)(svundef_u16(), svundef_u8(), u8);
-  // expected-error at +2 {{'svmovlt_u16' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_u16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_u16,,)(svundef_u8());
-  // expected-error at +2 {{'svqshl_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u16,_z,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u16,_m,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u16,_x,)(pg, svundef_u16(), svundef_s16());
-  // expected-error at +2 {{'svqshl_n_u16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u16,_z,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svqshl_n_u16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u16,_m,)(pg, svundef_u16(), i16);
-  // expected-error at +2 {{'svqshl_n_u16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u16,_x,)(pg, svundef_u16(), i16);
 
-  // expected-error at +2 {{'svmullb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmullb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svpmullb_pair_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_pair_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb_pair,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svpmullb_pair_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_pair_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb_pair,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svaddwb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_u32,,)(svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svaddwb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_u32,,)(svundef_u32(), u16);
-  // expected-error at +2 {{'svsubhnb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsubhnb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svrsubhnt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrsubhnt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32);
-  // expected-error at +2 {{'svnbsl_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svnbsl_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svsubhnt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsubhnt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32);
-  // expected-error at +2 {{'svwhilegt_b8_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b8_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b8,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilegt_b16_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b16_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b16,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilegt_b32_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b32_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b32,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilegt_b64_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b64_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b64,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svtbl2_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_u32,,)(svundef2_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsubr_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsubr_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsubr_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsubr_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhsubr_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhsubr_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u32,_x,)(pg, svundef_u32(), u32);
   // expected-error at +2 {{'svhistcnt_u32_z' needs target feature sve2}}
   // overload-error at +1 {{'svhistcnt_z' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistcnt,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'sveortb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'sveortb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svqxtnb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_u32,,)(svundef_u32());
-  // expected-error at +2 {{'svmlalt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmlalt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
-  // expected-error at +2 {{'svaddhnt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaddhnt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32);
   // expected-error at +2 {{'svldnt1uh_gather_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_u32' needs target feature sve2}}
@@ -3478,230 +3478,230 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1uh_gather_u32base_index_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_index_u32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1uh_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svbcax_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svbcax_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svqxtnt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_u32,,)(svundef_u16(), svundef_u32());
-  // expected-error at +2 {{'svqrshl_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u32,_z,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u32,_m,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u32,_x,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqrshl_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u32,_z,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svqrshl_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u32,_m,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svqrshl_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u32,_x,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svsublt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svsublt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svadalp_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u32,_z,)(pg, svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svadalp_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u32,_m,)(pg, svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svadalp_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u32,_x,)(pg, svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svwhilege_b8_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b8_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b8,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilege_b16_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b16_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b16,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilege_b32_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b32_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b32,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svwhilege_b64_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b64_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b64,_u32,,)(u32, u32);
-  // expected-error at +2 {{'svpmullt_pair_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_pair_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt_pair,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svpmullt_pair_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_pair_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt_pair' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt_pair,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svsubwt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_u32,,)(svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svsubwt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_u32,,)(svundef_u32(), u16);
-  // expected-error at +2 {{'svqsubr_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsubr_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsubr_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsubr_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqsubr_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqsubr_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u32,_x,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svadclt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svadclt' needs target feature sve2}}
+  // expected-error at +2 {{'svadclt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svadclt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svadclt' needs target feature sve2}}
+  // expected-error at +2 {{'svadclt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclt,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svaddp_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaddp_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrecpe_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrecpe_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrecpe_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrecpe_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrecpe,_u32,_z,)(pg, svundef_u32());
-  // expected-error at +2 {{'svrecpe_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrecpe_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrecpe_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrecpe_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrecpe,_u32,_m,)(svundef_u32(), pg, svundef_u32());
-  // expected-error at +2 {{'svrecpe_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrecpe_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrecpe_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrecpe_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrecpe,_u32,_x,)(pg, svundef_u32());
-  // expected-error at +2 {{'svqadd_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqadd_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqadd_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqadd_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqadd_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqadd_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u32,_x,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svabdlb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svabdlb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svtbx_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svabdlt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svabdlt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svminp_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svminp_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsqadd_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u32,_m,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svsqadd_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u32,_m,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svsqadd_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u32,_z,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svsqadd_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u32,_z,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svsqadd_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u32,_x,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svsqadd_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u32,_x,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svqsub_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsub_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsub_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svqsub_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqsub_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svqsub_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u32,_x,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svrsubhnb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrsubhnb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svaddhnb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaddhnb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svabalt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svabalt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
-  // expected-error at +2 {{'sveor3_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'sveor3_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svhadd_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhadd_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhadd_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhadd_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhadd_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhadd_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u32,_x,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svmovlb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_u32,,)(svundef_u16());
   // expected-error at +2 {{'svstnt1_scatter_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -3715,23 +3715,23 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u32base_index_u32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _u32)(pg, svundef_u32(), i64, svundef_u32());
-  // expected-error at +2 {{'svmaxp_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmaxp_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsbclt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclt' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsbclt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclt' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclt,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svmullt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmullt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_u32,,)(svundef_u16(), u16);
   // expected-error at +2 {{'svldnt1sh_gather_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_u32' needs target feature sve2}}
@@ -3745,41 +3745,41 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sh_gather_u32base_index_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_index_u32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sh_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svwhilerw_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_u32,,)(const_u32_ptr, const_u32_ptr);
-  // expected-error at +2 {{'svrhadd_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrhadd_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svrhadd_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrhadd_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svrhadd_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svrhadd_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u32,_x,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svraddhnb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_u32,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svraddhnb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_u32,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svwhilewr_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_u32,,)(const_u32_ptr, const_u32_ptr);
-  // expected-error at +2 {{'svmlalb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmlalb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
   // expected-error at +2 {{'svldnt1sb_gather_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_u32' needs target feature sve2}}
@@ -3790,11 +3790,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sb_gather_u32base_offset_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_offset_u32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sb_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svsubwb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_u32,,)(svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svsubwb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_u32,,)(svundef_u32(), u16);
   // expected-error at +2 {{'svldnt1ub_gather_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_u32' needs target feature sve2}}
@@ -3805,53 +3805,53 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1ub_gather_u32base_offset_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_offset_u32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1ub_gather, _u32base, _offset_u32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svaba_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaba_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svraddhnt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_u32,,)(svundef_u16(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svraddhnt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_u32,,)(svundef_u16(), svundef_u32(), u32);
-  // expected-error at +2 {{'sveorbt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'sveorbt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svbsl_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svbsl_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svadclb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svadclb' needs target feature sve2}}
+  // expected-error at +2 {{'svadclb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svadclb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svadclb' needs target feature sve2}}
+  // expected-error at +2 {{'svadclb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclb,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svhsub_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u32,_z,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsub_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u32,_m,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsub_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u32,_x,)(pg, svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svhsub_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u32,_z,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhsub_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u32,_m,)(pg, svundef_u32(), u32);
-  // expected-error at +2 {{'svhsub_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u32,_x,)(pg, svundef_u32(), u32);
   // expected-error at +2 {{'svldnt1_gather_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_u32' needs target feature sve2}}
@@ -3865,11 +3865,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1_gather_u32base_index_u32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_index_u32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_u32, )(pg, svundef_u32(), i64);
-  // expected-error at +2 {{'svaddlb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaddlb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_u32,,)(svundef_u16(), u16);
   // expected-error at +2 {{'svstnt1h_scatter_u32base_u32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1h_scatter' needs target feature sve2}}
@@ -3892,207 +3892,207 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1b_scatter_u32base_offset_u32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1b_scatter_offset' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1b_scatter, _u32base, _offset, _u32)(pg, svundef_u32(), i64, svundef_u32());
-  // expected-error at +2 {{'svbsl2n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svbsl2n_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svaddlt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svaddlt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svabalb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svabalb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
-  // expected-error at +2 {{'svsublb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_u32,,)(svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svsublb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_u32,,)(svundef_u16(), u16);
-  // expected-error at +2 {{'svsbclb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclb' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclb,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsbclb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclb' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclb,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svbsl1n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_u32,,)(svundef_u32(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svbsl1n_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_u32,,)(svundef_u32(), svundef_u32(), u32);
-  // expected-error at +2 {{'svrshl_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u32,_z,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u32,_m,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u32,_x,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svrshl_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u32,_z,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svrshl_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u32,_m,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svrshl_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u32,_x,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svrsqrte_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrsqrte_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrsqrte_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsqrte_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsqrte,_u32,_z,)(pg, svundef_u32());
-  // expected-error at +2 {{'svrsqrte_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrsqrte_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrsqrte_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsqrte_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsqrte,_u32,_m,)(svundef_u32(), pg, svundef_u32());
-  // expected-error at +2 {{'svrsqrte_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrsqrte_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrsqrte_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsqrte_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsqrte,_u32,_x,)(pg, svundef_u32());
-  // expected-error at +2 {{'svaddwt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_u32,,)(svundef_u32(), svundef_u16());
-  // expected-error at +2 {{'svaddwt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_u32,,)(svundef_u32(), u16);
-  // expected-error at +2 {{'svmlslb_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmlslb_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
-  // expected-error at +2 {{'svmlslt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_u32,,)(svundef_u32(), svundef_u16(), svundef_u16());
-  // expected-error at +2 {{'svmlslt_n_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_u32,,)(svundef_u32(), svundef_u16(), u16);
-  // expected-error at +2 {{'svmovlt_u32' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_u32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_u32,,)(svundef_u16());
-  // expected-error at +2 {{'svqshl_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u32,_z,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u32,_m,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u32,_x,)(pg, svundef_u32(), svundef_s32());
-  // expected-error at +2 {{'svqshl_n_u32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u32,_z,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svqshl_n_u32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u32,_m,)(pg, svundef_u32(), i32);
-  // expected-error at +2 {{'svqshl_n_u32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u32,_x,)(pg, svundef_u32(), i32);
 
-  // expected-error at +2 {{'svmullb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmullb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svmullb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullb,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svpmullb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svpmullb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullb' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullb,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svaddwb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_u64,,)(svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svaddwb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwb,_n_u64,,)(svundef_u64(), u32);
-  // expected-error at +2 {{'svsubhnb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_u64,,)(svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsubhnb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnb,_n_u64,,)(svundef_u64(), u64);
-  // expected-error at +2 {{'svrsubhnt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svrsubhnt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64);
-  // expected-error at +2 {{'svnbsl_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svnbsl_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svnbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svnbsl_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svnbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svnbsl,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svsubhnt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsubhnt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubhnt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64);
-  // expected-error at +2 {{'svwhilegt_b8_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b8_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b8,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilegt_b16_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b16_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b16,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilegt_b32_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b32_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b32,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilegt_b64_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilegt_b64_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilegt_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilegt_b64,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svtbl2_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_u64,,)(svundef2_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsubr_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsubr_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsubr_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsubr_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhsubr_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhsubr_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsubr_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsubr,_n_u64,_x,)(pg, svundef_u64(), u64);
   // expected-error at +2 {{'svhistcnt_u64_z' needs target feature sve2}}
   // overload-error at +1 {{'svhistcnt_z' needs target feature sve2}}
   SVE_ACLE_FUNC(svhistcnt,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'sveortb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'sveortb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveortb' needs target feature sve2}}
+  // expected-error at +2 {{'sveortb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveortb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveortb,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svqxtnb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnb' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnb,_u64,,)(svundef_u64());
-  // expected-error at +2 {{'svmlalt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmlalt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalt,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
-  // expected-error at +2 {{'svaddhnt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svaddhnt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64);
   // expected-error at +2 {{'svldnt1uh_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_u64' needs target feature sve2}}
@@ -4115,221 +4115,221 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1uh_gather_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uh_gather_index_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1uh_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svbcax_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svbcax_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbcax' needs target feature sve2}}
+  // expected-error at +2 {{'svbcax_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbcax' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbcax,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svqxtnt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svqxtnt' needs target feature sve2}}
+  // expected-error at +2 {{'svqxtnt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqxtnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqxtnt,_u64,,)(svundef_u32(), svundef_u64());
-  // expected-error at +2 {{'svqrshl_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u64,_z,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u64,_m,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_u64,_x,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqrshl_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u64,_z,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqrshl_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u64,_m,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqrshl_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqrshl_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqrshl,_n_u64,_x,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svsublt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsublt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublt' needs target feature sve2}}
+  // expected-error at +2 {{'svsublt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublt,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svadalp_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_z' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u64,_z,)(pg, svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svadalp_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u64,_m,)(pg, svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svadalp_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svadalp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svadalp_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadalp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadalp,_u64,_x,)(pg, svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svwhilege_b8_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b8_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b8' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b8,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilege_b16_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b16_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b16' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b16,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilege_b32_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b32_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b32' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b32,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svwhilege_b64_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilege_b64_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilege_b64' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilege_b64,_u64,,)(u64, u64);
-  // expected-error at +2 {{'svpmullt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svpmullt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svpmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svpmullt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svpmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svpmullt,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svsubwt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_u64,,)(svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svsubwt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwt' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwt,_n_u64,,)(svundef_u64(), u32);
-  // expected-error at +2 {{'svqsubr_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsubr_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsubr_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsubr_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqsubr_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqsubr_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsubr_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsubr_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsubr_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsubr,_n_u64,_x,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svadclt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svadclt' needs target feature sve2}}
+  // expected-error at +2 {{'svadclt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svadclt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svadclt' needs target feature sve2}}
+  // expected-error at +2 {{'svadclt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclt,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svaddp_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svaddp_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqadd_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqadd_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqadd_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqadd_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqadd_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqadd_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqadd_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqadd,_n_u64,_x,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svabdlb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svabdlb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlb' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlb,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svtbx_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svabdlt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svabdlt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabdlt' needs target feature sve2}}
+  // expected-error at +2 {{'svabdlt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabdlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabdlt,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svminp_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svminp_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsqadd_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u64,_m,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svsqadd_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u64,_m,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svsqadd_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u64,_z,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svsqadd_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u64,_z,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svsqadd_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_u64,_x,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svsqadd_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svsqadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svsqadd_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsqadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsqadd,_n_u64,_x,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqsub_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsub_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsub_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svqsub_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqsub_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svqsub_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqsub_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqsub,_n_u64,_x,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svrsubhnb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_u64,,)(svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svrsubhnb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svrsubhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svrsubhnb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrsubhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrsubhnb,_n_u64,,)(svundef_u64(), u64);
-  // expected-error at +2 {{'svaddhnb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_u64,,)(svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svaddhnb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddhnb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddhnb,_n_u64,,)(svundef_u64(), u64);
-  // expected-error at +2 {{'svabalt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svabalt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalt' needs target feature sve2}}
+  // expected-error at +2 {{'svabalt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalt,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
-  // expected-error at +2 {{'sveor3_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'sveor3_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveor3' needs target feature sve2}}
+  // expected-error at +2 {{'sveor3_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveor3' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveor3,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svhadd_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhadd_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhadd_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhadd_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhadd_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhadd_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhadd_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhadd,_n_u64,_x,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svmovlb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlb' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlb,_u64,,)(svundef_u32());
   // expected-error at +2 {{'svstnt1_scatter_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -4352,23 +4352,23 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _u64)(pg, svundef_u64(), i64, svundef_u64());
-  // expected-error at +2 {{'svmaxp_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svmaxp_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsbclt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclt' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsbclt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclt' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclt,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svmullt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmullt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmullt' needs target feature sve2}}
+  // expected-error at +2 {{'svmullt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmullt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmullt,_n_u64,,)(svundef_u32(), u32);
   // expected-error at +2 {{'svldnt1sh_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_u64' needs target feature sve2}}
@@ -4391,41 +4391,41 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sh_gather_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sh_gather_index_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sh_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svwhilerw_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_u64,,)(const_u64_ptr, const_u64_ptr);
-  // expected-error at +2 {{'svrhadd_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svrhadd_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svrhadd_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svrhadd_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svrhadd_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svrhadd_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrhadd_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrhadd_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrhadd_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrhadd,_n_u64,_x,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svraddhnb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_u64,,)(svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svraddhnb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnb' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnb,_n_u64,,)(svundef_u64(), u64);
-  // expected-error at +2 {{'svwhilewr_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_u64,,)(const_u64_ptr, const_u64_ptr);
-  // expected-error at +2 {{'svmlalb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmlalb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlalb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlalb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlalb,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
   // expected-error at +2 {{'svldnt1sb_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_u64' needs target feature sve2}}
@@ -4439,11 +4439,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sb_gather_u64base_offset_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sb_gather_offset_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sb_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svsubwb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_u64,,)(svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svsubwb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsubwb' needs target feature sve2}}
+  // expected-error at +2 {{'svsubwb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsubwb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsubwb,_n_u64,,)(svundef_u64(), u32);
   // expected-error at +2 {{'svldnt1ub_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_u64' needs target feature sve2}}
@@ -4457,23 +4457,23 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1ub_gather_u64base_offset_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1ub_gather_offset_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1ub_gather, _u64base, _offset_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svaba_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svaba_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaba' needs target feature sve2}}
+  // expected-error at +2 {{'svaba_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaba' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaba,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svraddhnt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_u64,,)(svundef_u32(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svraddhnt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svraddhnt' needs target feature sve2}}
+  // expected-error at +2 {{'svraddhnt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svraddhnt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svraddhnt,_n_u64,,)(svundef_u32(), svundef_u64(), u64);
-  // expected-error at +2 {{'sveorbt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'sveorbt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'sveorbt' needs target feature sve2}}
+  // expected-error at +2 {{'sveorbt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'sveorbt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(sveorbt,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
   // expected-error at +2 {{'svldnt1sw_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sw_gather_u64' needs target feature sve2}}
@@ -4496,35 +4496,35 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1sw_gather_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1sw_gather_index_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1sw_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svbsl_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svbsl_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svadclb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svadclb' needs target feature sve2}}
+  // expected-error at +2 {{'svadclb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svadclb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svadclb' needs target feature sve2}}
+  // expected-error at +2 {{'svadclb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svadclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svadclb,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svhsub_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u64,_z,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsub_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u64,_m,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsub_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_u64,_x,)(pg, svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svhsub_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_z' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u64,_z,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhsub_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_m' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u64,_m,)(pg, svundef_u64(), u64);
-  // expected-error at +2 {{'svhsub_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svhsub_x' needs target feature sve2}}
+  // expected-error at +2 {{'svhsub_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svhsub_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svhsub,_n_u64,_x,)(pg, svundef_u64(), u64);
   // expected-error at +2 {{'svldnt1_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_u64' needs target feature sve2}}
@@ -4547,11 +4547,11 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1_gather_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_index_u64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_u64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svaddlb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaddlb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlb' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlb,_n_u64,,)(svundef_u32(), u32);
   // expected-error at +2 {{'svldnt1uw_gather_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1uw_gather_u64' needs target feature sve2}}
@@ -4607,17 +4607,17 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1b_scatter_u64base_offset_u64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1b_scatter_offset' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1b_scatter, _u64base, _offset, _u64)(pg, svundef_u64(), i64, svundef_u64());
-  // expected-error at +2 {{'svbsl2n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svbsl2n_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl2n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl2n_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl2n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl2n,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svaddlt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svaddlt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddlt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddlt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddlt,_n_u64,,)(svundef_u32(), u32);
   // expected-error at +2 {{'svstnt1w_scatter_u64base_u64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1w_scatter' needs target feature sve2}}
@@ -4640,178 +4640,178 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1w_scatter_u64base_index_u64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1w_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1w_scatter, _u64base, _index, _u64)(pg, svundef_u64(), i64, svundef_u64());
-  // expected-error at +2 {{'svabalb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svabalb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svabalb' needs target feature sve2}}
+  // expected-error at +2 {{'svabalb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svabalb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svabalb,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
-  // expected-error at +2 {{'svsublb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_u64,,)(svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svsublb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsublb' needs target feature sve2}}
+  // expected-error at +2 {{'svsublb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsublb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsublb,_n_u64,,)(svundef_u32(), u32);
-  // expected-error at +2 {{'svsbclb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclb' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclb,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svsbclb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svsbclb' needs target feature sve2}}
+  // expected-error at +2 {{'svsbclb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svsbclb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svsbclb,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svbsl1n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_u64,,)(svundef_u64(), svundef_u64(), svundef_u64());
-  // expected-error at +2 {{'svbsl1n_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svbsl1n' needs target feature sve2}}
+  // expected-error at +2 {{'svbsl1n_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svbsl1n' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svbsl1n,_n_u64,,)(svundef_u64(), svundef_u64(), u64);
-  // expected-error at +2 {{'svrshl_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u64,_z,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u64,_m,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_u64,_x,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svrshl_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u64,_z,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svrshl_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u64,_m,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svrshl_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svrshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svrshl_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svrshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svrshl,_n_u64,_x,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svaddwt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_u64,,)(svundef_u64(), svundef_u32());
-  // expected-error at +2 {{'svaddwt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svaddwt' needs target feature sve2}}
+  // expected-error at +2 {{'svaddwt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddwt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddwt,_n_u64,,)(svundef_u64(), u32);
-  // expected-error at +2 {{'svmlslb_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmlslb_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslb' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslb_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslb' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslb,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
-  // expected-error at +2 {{'svmlslt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_u64,,)(svundef_u64(), svundef_u32(), svundef_u32());
-  // expected-error at +2 {{'svmlslt_n_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmlslt' needs target feature sve2}}
+  // expected-error at +2 {{'svmlslt_n_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmlslt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmlslt,_n_u64,,)(svundef_u64(), svundef_u32(), u32);
-  // expected-error at +2 {{'svmovlt_u64' needs target feature sve2}}
-  // overload-error at +1 {{'svmovlt' needs target feature sve2}}
+  // expected-error at +2 {{'svmovlt_u64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmovlt' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmovlt,_u64,,)(svundef_u32());
-  // expected-error at +2 {{'svqshl_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u64,_z,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u64,_m,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_u64,_x,)(pg, svundef_u64(), svundef_s64());
-  // expected-error at +2 {{'svqshl_n_u64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_z' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u64,_z,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqshl_n_u64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_m' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u64,_m,)(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svqshl_n_u64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svqshl_x' needs target feature sve2}}
+  // expected-error at +2 {{'svqshl_n_u64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svqshl_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svqshl,_n_u64,_x,)(pg, svundef_u64(), i64);
 
-  // expected-error at +2 {{'svlogb_f16_z' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_z' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f16_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f16,_z,)(pg, svundef_f16());
-  // expected-error at +2 {{'svlogb_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_m' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f16,_m,)(svundef_s16(), pg, svundef_f16());
-  // expected-error at +2 {{'svlogb_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_x' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f16,_x,)(pg, svundef_f16());
-  // expected-error at +2 {{'svminnmp_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f16,_m,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svminnmp_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f16,_x,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svtbl2_f16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_f16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_f16,,)(svundef2_f16(), svundef_u16());
-  // expected-error at +2 {{'svaddp_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f16,_m,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svaddp_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f16,_x,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svtbx_f16' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_f16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_f16,,)(svundef_f16(), svundef_f16(), svundef_u16());
-  // expected-error at +2 {{'svminp_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f16,_m,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svminp_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f16,_x,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svmaxp_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f16,_m,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svmaxp_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f16,_x,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svmaxnmp_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f16,_m,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svmaxnmp_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f16,_x,)(pg, svundef_f16(), svundef_f16());
-  // expected-error at +2 {{'svwhilerw_f16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_f16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_f16,,)(const_f16_ptr, const_f16_ptr);
-  // expected-error at +2 {{'svwhilewr_f16' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_f16' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_f16,,)(const_f16_ptr, const_f16_ptr);
-  // expected-error at +2 {{'svcvtlt_f32_f16_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtlt_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtlt_f32_f16_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtlt_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtlt_f32,_f16,_m,)(svundef_f32(), pg, svundef_f16());
-  // expected-error at +2 {{'svcvtlt_f32_f16_x' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtlt_f32_x' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtlt_f32_f16_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtlt_f32_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtlt_f32,_f16,_x,)(pg, svundef_f16());
 
-  // expected-error at +2 {{'svlogb_f32_z' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_z' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f32_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f32,_z,)(pg, svundef_f32());
-  // expected-error at +2 {{'svlogb_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_m' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f32,_m,)(svundef_s32(), pg, svundef_f32());
-  // expected-error at +2 {{'svlogb_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_x' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f32,_x,)(pg, svundef_f32());
-  // expected-error at +2 {{'svminnmp_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f32,_m,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svminnmp_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f32,_x,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svtbl2_f32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_f32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_f32,,)(svundef2_f32(), svundef_u32());
-  // expected-error at +2 {{'svaddp_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f32,_m,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svaddp_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f32,_x,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svtbx_f32' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_f32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_f32,,)(svundef_f32(), svundef_f32(), svundef_u32());
-  // expected-error at +2 {{'svminp_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f32,_m,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svminp_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f32,_x,)(pg, svundef_f32(), svundef_f32());
   // expected-error at +2 {{'svstnt1_scatter_u32base_f32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -4825,35 +4825,35 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u32base_index_f32' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u32base, _index, _f32)(pg, svundef_u32(), i64, svundef_f32());
-  // expected-error at +2 {{'svmaxp_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f32,_m,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svmaxp_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f32,_x,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svmaxnmp_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f32,_m,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svmaxnmp_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f32,_x,)(pg, svundef_f32(), svundef_f32());
-  // expected-error at +2 {{'svwhilerw_f32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_f32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_f32,,)(const_f32_ptr, const_f32_ptr);
-  // expected-error at +2 {{'svcvtnt_f16_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtnt_f16_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtnt_f16_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtnt_f16_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtnt_f16,_f32,_m,)(svundef_f16(), pg, svundef_f32());
-  // expected-error at +2 {{'svcvtnt_f16_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtnt_f16_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtnt_f16_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtnt_f16_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtnt_f16,_f32,_x,)(svundef_f16(), pg, svundef_f32());
-  // expected-error at +2 {{'svwhilewr_f32' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_f32' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_f32,,)(const_f32_ptr, const_f32_ptr);
-  // expected-error at +2 {{'svcvtlt_f64_f32_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtlt_f64_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtlt_f64_f32_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtlt_f64_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtlt_f64,_f32,_m,)(svundef_f64(), pg, svundef_f32());
-  // expected-error at +2 {{'svcvtlt_f64_f32_x' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtlt_f64_x' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtlt_f64_f32_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtlt_f64_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtlt_f64,_f32,_x,)(pg, svundef_f32());
   // expected-error at +2 {{'svldnt1_gather_u32base_f32' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_f32' needs target feature sve2}}
@@ -4868,38 +4868,38 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // overload-error at +1 {{'svldnt1_gather_index_f32' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u32base, _index_f32, )(pg, svundef_u32(), i64);
 
-  // expected-error at +2 {{'svlogb_f64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_z' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f64,_z,)(pg, svundef_f64());
-  // expected-error at +2 {{'svlogb_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_m' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f64,_m,)(svundef_s64(), pg, svundef_f64());
-  // expected-error at +2 {{'svlogb_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svlogb_x' needs target feature sve2}}
+  // expected-error at +2 {{'svlogb_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svlogb_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svlogb,_f64,_x,)(pg, svundef_f64());
-  // expected-error at +2 {{'svminnmp_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f64,_m,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svminnmp_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminnmp_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminnmp,_f64,_x,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svtbl2_f64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2}}
+  // expected-error at +2 {{'svtbl2_f64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbl2,_f64,,)(svundef2_f64(), svundef_u64());
-  // expected-error at +2 {{'svaddp_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f64,_m,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svaddp_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svaddp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svaddp_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svaddp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svaddp,_f64,_x,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svtbx_f64' needs target feature sve2}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2}}
+  // expected-error at +2 {{'svtbx_f64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svtbx,_f64,,)(svundef_f64(), svundef_f64(), svundef_u64());
-  // expected-error at +2 {{'svminp_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f64,_m,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svminp_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svminp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svminp_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svminp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svminp,_f64,_x,)(pg, svundef_f64(), svundef_f64());
   // expected-error at +2 {{'svstnt1_scatter_u64base_f64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter' needs target feature sve2}}
@@ -4922,38 +4922,38 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svstnt1_scatter_u64base_index_f64' needs target feature sve2}}
   // overload-error at +1 {{'svstnt1_scatter_index' needs target feature sve2}}
   SVE_ACLE_FUNC(svstnt1_scatter, _u64base, _index, _f64)(pg, svundef_u64(), i64, svundef_f64());
-  // expected-error at +2 {{'svmaxp_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f64,_m,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svmaxp_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxp_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxp,_f64,_x,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svmaxnmp_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f64,_m,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svmaxnmp_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2}}
+  // expected-error at +2 {{'svmaxnmp_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svmaxnmp_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svmaxnmp,_f64,_x,)(pg, svundef_f64(), svundef_f64());
-  // expected-error at +2 {{'svwhilerw_f64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilerw_f64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilerw,_f64,,)(const_f64_ptr, const_f64_ptr);
-  // expected-error at +2 {{'svcvtnt_f32_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtnt_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtnt_f32_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtnt_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtnt_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64());
-  // expected-error at +2 {{'svcvtnt_f32_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtnt_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtnt_f32_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtnt_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtnt_f32,_f64,_x,)(svundef_f32(), pg, svundef_f64());
-  // expected-error at +2 {{'svwhilewr_f64' needs target feature sve2}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2}}
+  // expected-error at +2 {{'svwhilewr_f64' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svwhilewr,_f64,,)(const_f64_ptr, const_f64_ptr);
-  // expected-error at +2 {{'svcvtx_f32_f64_z' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtx_f32_z' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtx_f32_f64_z' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtx_f32_z' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtx_f32,_f64,_z,)(pg, svundef_f64());
-  // expected-error at +2 {{'svcvtx_f32_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtx_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtx_f32_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtx_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtx_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64());
-  // expected-error at +2 {{'svcvtx_f32_f64_x' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtx_f32_x' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtx_f32_f64_x' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtx_f32_x' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtx_f32,_f64,_x,)(pg, svundef_f64());
   // expected-error at +2 {{'svldnt1_gather_u64base_f64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_f64' needs target feature sve2}}
@@ -4976,10 +4976,10 @@ void test(svbool_t pg, const int8_t *const_i8_ptr, const uint8_t *const_u8_ptr,
   // expected-error at +2 {{'svldnt1_gather_u64base_index_f64' needs target feature sve2}}
   // overload-error at +1 {{'svldnt1_gather_index_f64' needs target feature sve2}}
   SVE_ACLE_FUNC(svldnt1_gather, _u64base, _index_f64, )(pg, svundef_u64(), i64);
-  // expected-error at +2 {{'svcvtxnt_f32_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtxnt_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtxnt_f32_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtxnt_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_m,)(svundef_f32(), pg, svundef_f64());
-  // expected-error at +2 {{'svcvtxnt_f32_f64_m' needs target feature sve2}}
-  // overload-error at +1 {{'svcvtxnt_f32_m' needs target feature sve2}}
+  // expected-error at +2 {{'svcvtxnt_f32_f64_m' needs target feature sve2|sme}}
+  // overload-error at +1 {{'svcvtxnt_f32_m' needs target feature sve2|sme}}
   SVE_ACLE_FUNC(svcvtxnt_f32,_f64,_x,)(svundef_f32(), pg, svundef_f64());
 }
diff --git a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp
index 60f5aa59e1055..4c8394dc81ad0 100644
--- a/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp
+++ b/clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_bfloat.cpp
@@ -14,16 +14,16 @@
 
 void test_bfloat(const bfloat16_t *const_bf16_ptr, svbfloat16_t bf16, svbfloat16x2_t bf16x2)
 {
-  // expected-error at +2 {{'svwhilerw_bf16' needs target feature sve2,bf16}}
-  // overload-error at +1 {{'svwhilerw' needs target feature sve2,bf16}}
+  // expected-error at +2 {{'svwhilerw_bf16' needs target feature (sve2,bf16)|sme}}
+  // overload-error at +1 {{'svwhilerw' needs target feature (sve2,bf16)|sme}}
   SVE_ACLE_FUNC(svwhilerw,_bf16,,)(const_bf16_ptr, const_bf16_ptr);
-  // expected-error at +2 {{'svtbx_bf16' needs target feature sve2,bf16}}
-  // overload-error at +1 {{'svtbx' needs target feature sve2,bf16}}
+  // expected-error at +2 {{'svtbx_bf16' needs target feature (sve2,bf16)|sme}}
+  // overload-error at +1 {{'svtbx' needs target feature (sve2,bf16)|sme}}
   SVE_ACLE_FUNC(svtbx,_bf16,,)(bf16, bf16, svundef_u16());
-  // expected-error at +2 {{'svtbl2_bf16' needs target feature sve2,bf16}}
-  // overload-error at +1 {{'svtbl2' needs target feature sve2,bf16}}
+  // expected-error at +2 {{'svtbl2_bf16' needs target feature (sve2,bf16)|sme}}
+  // overload-error at +1 {{'svtbl2' needs target feature (sve2,bf16)|sme}}
   SVE_ACLE_FUNC(svtbl2,_bf16,,)(bf16x2, svundef_u16());
-  // expected-error at +2 {{'svwhilewr_bf16' needs target feature sve2,bf16}}
-  // overload-error at +1 {{'svwhilewr' needs target feature sve2,bf16}}
+  // expected-error at +2 {{'svwhilewr_bf16' needs target feature (sve2,bf16)|sme}}
+  // overload-error at +1 {{'svwhilewr' needs target feature (sve2,bf16)|sme}}
   SVE_ACLE_FUNC(svwhilewr,_bf16,,)(const_bf16_ptr, const_bf16_ptr);
 }



More information about the cfe-commits mailing list