[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)

Tue Dec 19 09:45:47 PST 2023

https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75596

>From 04a03eae3fcbdd57257ce3867615ec6be9d84e53 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 15 Dec 2023 12:18:53 +0000
Subject: [PATCH 1/3] [AArch64] Update target feature requirements of SVE
 bfloat instructions

According to the latest update of the ISA
https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en
all of the affected instruction encodings now require

    (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16
---
 clang/include/clang/Basic/arm_sve.td          |  2 +-
 .../acle_sve2p1_bfadd.c                       | 11 ++--
 .../acle_sve2p1_bfmax.c                       | 11 ++--
 .../acle_sve2p1_bfmaxnm.c                     | 11 ++--
 .../acle_sve2p1_bfmin.c                       | 11 ++--
 .../acle_sve2p1_bfminnm.c                     | 11 ++--
 .../acle_sve2p1_bfmla.c                       | 11 ++--
 .../acle_sve2p1_bfmls.c                       | 11 ++--
 .../acle_sve2p1_bfmul.c                       | 11 ++--
 .../acle_sve2p1_bfsub.c                       | 11 ++--
 llvm/lib/Target/AArch64/AArch64.td            |  4 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++--
 llvm/test/MC/AArch64/SVE2p1/bfadd.s           | 43 ++++++++++------
 llvm/test/MC/AArch64/SVE2p1/bfclamp.s         | 32 ++++++++----
 llvm/test/MC/AArch64/SVE2p1/bfmax.s           | 34 ++++++++-----
 llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s         | 34 ++++++++-----
 llvm/test/MC/AArch64/SVE2p1/bfmin.s           | 34 ++++++++-----
 llvm/test/MC/AArch64/SVE2p1/bfminnm.s         | 34 ++++++++-----
 llvm/test/MC/AArch64/SVE2p1/bfmla.s           | 44 +++++++++-------
 llvm/test/MC/AArch64/SVE2p1/bfmls.s           | 45 +++++++++-------
 llvm/test/MC/AArch64/SVE2p1/bfmul.s           | 51 +++++++++++--------
 llvm/test/MC/AArch64/SVE2p1/bfsub.s           | 43 ++++++++++------
 22 files changed, 311 insertions(+), 198 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 98d7028eb28309..e84d6e5e4cc602 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in {
   def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>;
 }
 
-let TargetGuard = "sve2p1,b16b16" in {
+let TargetGuard = "(sve2|sme2),b16b16" in {
 defm SVMUL_BF  : SInstZPZZ<"svmul",  "b", "aarch64_sve_fmul",   "aarch64_sve_fmul_u">;
 defm SVADD_BF  : SInstZPZZ<"svadd",  "b", "aarch64_sve_fadd",   "aarch64_sve_fadd_u">;
 defm SVSUB_BF  : SInstZPZZ<"svsub",  "b", "aarch64_sve_fsub",   "aarch64_sve_fsub_u">;
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
index 327c4f078872b3..a3026fee3f6d29 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
index 0553b993622bde..4bdd4e50bbdb60 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
index fbbafde686edba..5e8d8b55edbf18 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
index bf774ee0cef661..f9829cd969eabb 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
index cf00f0d504522b..ff2f0388cdc82b 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
index 0e1532563f8bb8..63e7f12d69c5c2 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
index b7d576ea01df6f..61a0ded321e2f2 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
index 8b0de974f2473b..8c8df280dd93df 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
index c5cdf7efa445b8..aad12fbe9b839a 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
@@ -1,10 +1,11 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: aarch64-registered-target
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
-// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
 #include <arm_sve.h>
 
 #ifdef SVE_OVERLOADED_FORMS
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index c600bcaab2b3ea..234f983a81d541 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -783,7 +783,7 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
 def SVE2p1Unsupported : AArch64Unsupported;
 
 def SVE2Unsupported : AArch64Unsupported {
-  let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
+  let F = !listconcat([HasSVE2, HasSVE2orSME, HasSVE2orSME2, HasSSVE_FP8FMA, HasSMEF8F16,
                        HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
                        SVE2p1Unsupported.F);
 }
@@ -797,7 +797,7 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
 def SME2p1Unsupported : AArch64Unsupported;
 
 def SME2Unsupported : AArch64Unsupported {
-  let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
+  let F = !listconcat([HasSME2, HasSVE2orSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
                       HasSMEF8F16, HasSMEF8F32],
                       SME2p1Unsupported.F);
 }
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 50527e08a06165..f68059889d0c51 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -4003,10 +4003,10 @@ def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
 } // End HasSVE2p1_or_HasSME2
 
 //===----------------------------------------------------------------------===//
-// SVE2.1 non-widening BFloat16 to BFloat16 instructions
+// Non-widening BFloat16 to BFloat16 instructions
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasSVE2p1, HasB16B16, UseExperimentalZeroingPseudos] in {
+let Predicates = [HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos] in {
 defm BFADD_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fadd>;
 defm BFSUB_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fsub>;
 defm BFMUL_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmul>;
@@ -4014,9 +4014,9 @@ defm BFMAXNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmaxnm>;
 defm BFMINNM_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fminnm>;
 defm BFMIN_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmin>;
 defm BFMAX_ZPZZ : sve2p1_bf_2op_p_zds_zeroing<int_aarch64_sve_fmax>;
-} //HasSVE2p1_or_HasSME2p1, HasB16B16, UseExperimentalZeroingPseudos
+} // HasSVE2orSME2, HasB16B16, UseExperimentalZeroingPseudos
 
-let Predicates = [HasSVE2p1, HasB16B16] in {
+let Predicates = [HasSVE2orSME2, HasB16B16] in {
 
 defm BFMLA_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b00, "bfmla", "BFMLA_ZPZZZ", AArch64fmla_m1>;
 defm BFMLS_ZPmZZ : sve_fp_3op_p_zds_a_bf<0b01, "bfmls", "BFMLS_ZPZZZ", AArch64fmls_m1>;
@@ -4056,7 +4056,7 @@ defm BFMINNM_ZPZZ : sve2p1_bf_bin_pred_zds<AArch64fminnm_p>;
 defm BFMUL_ZZZI : sve2p1_fp_bfmul_by_indexed_elem<"bfmul", int_aarch64_sve_fmul_lane>;
 
 defm BFCLAMP_ZZZ : sve2p1_bfclamp<"bfclamp", int_aarch64_sve_fclamp>;
-} // End HasSVE2p1_or_HasSME2p1, HasB16B16
+} // End HasSVE2orSME2, HasB16B16
 
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfadd.s b/llvm/test/MC/AArch64/SVE2p1/bfadd.s
index a29f3e6af8ba48..b0d6733647ce81 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfadd.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfadd.s
@@ -1,14 +1,25 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
@@ -16,7 +27,7 @@ bfadd   z23.h, p3/m, z23.h, z13.h  // 01100101-00000000-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfadd   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65008db7 <unknown>
 
 movprfx z23, z31
@@ -24,53 +35,53 @@ bfadd   z23.h, p3/m, z23.h, z13.h  // 01100101-00000000-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfadd   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65008db7 <unknown>
 
 bfadd   z0.h, p0/m, z0.h, z0.h  // 01100101-00000000-10000000-00000000
 // CHECK-INST: bfadd   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65008000 <unknown>
 
 bfadd   z21.h, p5/m, z21.h, z10.h  // 01100101-00000000-10010101-01010101
 // CHECK-INST: bfadd   z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65009555 <unknown>
 
 bfadd   z23.h, p3/m, z23.h, z13.h  // 01100101-00000000-10001101-10110111
 // CHECK-INST: bfadd   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65008db7 <unknown>
 
 bfadd   z31.h, p7/m, z31.h, z31.h  // 01100101-00000000-10011111-11111111
 // CHECK-INST: bfadd   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65009fff <unknown>
 
 bfadd   z0.h, z0.h, z0.h  // 01100101-00000000-00000000-00000000
 // CHECK-INST: bfadd   z0.h, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x00,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65000000 <unknown>
 
 bfadd   z21.h, z10.h, z21.h  // 01100101-00010101-00000001-01010101
 // CHECK-INST: bfadd   z21.h, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x01,0x15,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65150155 <unknown>
 
 bfadd   z23.h, z13.h, z8.h  // 01100101-00001000-00000001-10110111
 // CHECK-INST: bfadd   z23.h, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x01,0x08,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 650801b7 <unknown>
 
 bfadd   z31.h, z31.h, z31.h  // 01100101-00011111-00000011-11111111
 // CHECK-INST: bfadd   z31.h, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x03,0x1f,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 651f03ff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfclamp.s b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s
index aed96f3d91e987..93b0a3e49a0b75 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfclamp.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfclamp.s
@@ -1,46 +1,56 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx z23, z31
 bfclamp z23.h, z13.h, z8.h  // 01100100-00101000-00100101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfclamp z23.h, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x25,0x28,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 642825b7 <unknown>
 
 bfclamp z0.h, z0.h, z0.h  // 01100100-00100000-00100100-00000000
 // CHECK-INST: bfclamp z0.h, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x24,0x20,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64202400 <unknown>
 
 bfclamp z21.h, z10.h, z21.h  // 01100100-00110101-00100101-01010101
 // CHECK-INST: bfclamp z21.h, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x25,0x35,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64352555 <unknown>
 
 bfclamp z23.h, z13.h, z8.h  // 01100100-00101000-00100101-10110111
 // CHECK-INST: bfclamp z23.h, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x25,0x28,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 642825b7 <unknown>
 
 bfclamp z31.h, z31.h, z31.h  // 01100100-00111111-00100111-11111111
 // CHECK-INST: bfclamp z31.h, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x27,0x3f,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 643f27ff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmax.s b/llvm/test/MC/AArch64/SVE2p1/bfmax.s
index bf69c0a0406868..e6089f59c68a5e 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmax.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmax.s
@@ -1,23 +1,33 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
 bfmax   z23.h, p3/m, z23.h, z13.h  // 01100101-00000110-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmax   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65068db7 <unknown>
 
 movprfx z23, z31
@@ -25,29 +35,29 @@ bfmax   z23.h, p3/m, z23.h, z13.h  // 01100101-00000110-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmax   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65068db7 <unknown>
 
 bfmax   z0.h, p0/m, z0.h, z0.h  // 01100101-00000110-10000000-00000000
 // CHECK-INST: bfmax   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65068000 <unknown>
 
 bfmax   z21.h, p5/m, z21.h, z10.h  // 01100101-00000110-10010101-01010101
 // CHECK-INST: bfmax   z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65069555 <unknown>
 
 bfmax   z23.h, p3/m, z23.h, z13.h  // 01100101-00000110-10001101-10110111
 // CHECK-INST: bfmax   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65068db7 <unknown>
 
 bfmax   z31.h, p7/m, z31.h, z31.h  // 01100101-00000110-10011111-11111111
 // CHECK-INST: bfmax   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x06,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65069fff <unknown>
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s
index 8e4ffc31218ab5..a4b0e9a3f8c3aa 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s
@@ -1,23 +1,33 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
 bfmaxnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000100-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65048db7 <unknown>
 
 movprfx z23, z31
@@ -25,30 +35,30 @@ bfmaxnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000100-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65048db7 <unknown>
 
 bfmaxnm z0.h, p0/m, z0.h, z0.h  // 01100101-00000100-10000000-00000000
 // CHECK-INST: bfmaxnm z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65048000 <unknown>
 
 bfmaxnm z21.h, p5/m, z21.h, z10.h  // 01100101-00000100-10010101-01010101
 // CHECK-INST: bfmaxnm z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65049555 <unknown>
 
 bfmaxnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000100-10001101-10110111
 // CHECK-INST: bfmaxnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65048db7 <unknown>
 
 bfmaxnm z31.h, p7/m, z31.h, z31.h  // 01100101-00000100-10011111-11111111
 // CHECK-INST: bfmaxnm z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x04,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65049fff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmin.s b/llvm/test/MC/AArch64/SVE2p1/bfmin.s
index 17bf50913271c5..2475143324ad76 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmin.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmin.s
@@ -1,23 +1,33 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
 bfmin   z23.h, p3/m, z23.h, z13.h  // 01100101-00000111-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmin   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65078db7 <unknown>
 
 movprfx z23, z31
@@ -25,30 +35,30 @@ bfmin   z23.h, p3/m, z23.h, z13.h  // 01100101-00000111-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmin   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65078db7 <unknown>
 
 bfmin   z0.h, p0/m, z0.h, z0.h  // 01100101-00000111-10000000-00000000
 // CHECK-INST: bfmin   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65078000 <unknown>
 
 bfmin   z21.h, p5/m, z21.h, z10.h  // 01100101-00000111-10010101-01010101
 // CHECK-INST: bfmin   z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65079555 <unknown>
 
 bfmin   z23.h, p3/m, z23.h, z13.h  // 01100101-00000111-10001101-10110111
 // CHECK-INST: bfmin   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65078db7 <unknown>
 
 bfmin   z31.h, p7/m, z31.h, z31.h  // 01100101-00000111-10011111-11111111
 // CHECK-INST: bfmin   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x07,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65079fff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfminnm.s b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s
index e0cd2adc675eea..6c4c9e473018e4 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfminnm.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfminnm.s
@@ -1,23 +1,33 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
 bfminnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000101-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65058db7 <unknown>
 
 movprfx z23, z31
@@ -25,30 +35,30 @@ bfminnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000101-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65058db7 <unknown>
 
 bfminnm z0.h, p0/m, z0.h, z0.h  // 01100101-00000101-10000000-00000000
 // CHECK-INST: bfminnm z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65058000 <unknown>
 
 bfminnm z21.h, p5/m, z21.h, z10.h  // 01100101-00000101-10010101-01010101
 // CHECK-INST: bfminnm z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65059555 <unknown>
 
 bfminnm z23.h, p3/m, z23.h, z13.h  // 01100101-00000101-10001101-10110111
 // CHECK-INST: bfminnm z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65058db7 <unknown>
 
 bfminnm z31.h, p7/m, z31.h, z31.h  // 01100101-00000101-10011111-11111111
 // CHECK-INST: bfminnm z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x05,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65059fff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmla.s b/llvm/test/MC/AArch64/SVE2p1/bfmla.s
index a265eb8b71df92..1d22cbe7add967 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmla.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmla.s
@@ -1,47 +1,57 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx z23, z31
 bfmla   z23.h, z13.h, z0.h[5]  // 01100100-01101000-00001001-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmla   z23.h, z13.h, z0.h[5]
 // CHECK-ENCODING: [0xb7,0x09,0x68,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 646809b7 <unknown>
 
 bfmla   z0.h, z0.h, z0.h[0]  // 01100100-00100000-00001000-00000000
 // CHECK-INST: bfmla   z0.h, z0.h, z0.h[0]
 // CHECK-ENCODING: [0x00,0x08,0x20,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64200800 <unknown>
 
 bfmla   z21.h, z10.h, z5.h[6]  // 01100100-01110101-00001001-01010101
 // CHECK-INST: bfmla   z21.h, z10.h, z5.h[6]
 // CHECK-ENCODING: [0x55,0x09,0x75,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64750955 <unknown>
 
 bfmla   z23.h, z13.h, z0.h[5]  // 01100100-01101000-00001001-10110111
 // CHECK-INST: bfmla   z23.h, z13.h, z0.h[5]
 // CHECK-ENCODING: [0xb7,0x09,0x68,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 646809b7 <unknown>
 
 bfmla   z31.h, z31.h, z7.h[7]  // 01100100-01111111-00001011-11111111
 // CHECK-INST: bfmla   z31.h, z31.h, z7.h[7]
 // CHECK-ENCODING: [0xff,0x0b,0x7f,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 647f0bff <unknown>
 
 
@@ -50,7 +60,7 @@ bfmla   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmla   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x0d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65280db7 <unknown>
 
 movprfx z23, z31
@@ -58,30 +68,30 @@ bfmla   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmla   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x0d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65280db7 <unknown>
 
 bfmla   z0.h, p0/m, z0.h, z0.h  // 01100101-00100000-00000000-00000000
 // CHECK-INST: bfmla   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x00,0x20,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65200000 <unknown>
 
 bfmla   z21.h, p5/m, z10.h, z21.h  // 01100101-00110101-00010101-01010101
 // CHECK-INST: bfmla   z21.h, p5/m, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x15,0x35,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65351555 <unknown>
 
 bfmla   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00001101-10110111
 // CHECK-INST: bfmla   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x0d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65280db7 <unknown>
 
 bfmla   z31.h, p7/m, z31.h, z31.h  // 01100101-00111111-00011111-11111111
 // CHECK-INST: bfmla   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x1f,0x3f,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 653f1fff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmls.s b/llvm/test/MC/AArch64/SVE2p1/bfmls.s
index 56713e74adf8f0..7a27e3dc46af28 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmls.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmls.s
@@ -1,14 +1,25 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 
@@ -17,31 +28,31 @@ bfmls   z23.h, z13.h, z0.h[5]  // 01100100-01101000-00001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmls   z23.h, z13.h, z0.h[5]
 // CHECK-ENCODING: [0xb7,0x0d,0x68,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64680db7 <unknown>
 
 bfmls   z0.h, z0.h, z0.h[0]  // 01100100-00100000-00001100-00000000
 // CHECK-INST: bfmls   z0.h, z0.h, z0.h[0]
 // CHECK-ENCODING: [0x00,0x0c,0x20,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64200c00 <unknown>
 
 bfmls   z21.h, z10.h, z5.h[6]  // 01100100-01110101-00001101-01010101
 // CHECK-INST: bfmls   z21.h, z10.h, z5.h[6]
 // CHECK-ENCODING: [0x55,0x0d,0x75,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64750d55 <unknown>
 
 bfmls   z23.h, z13.h, z0.h[5]  // 01100100-01101000-00001101-10110111
 // CHECK-INST: bfmls   z23.h, z13.h, z0.h[5]
 // CHECK-ENCODING: [0xb7,0x0d,0x68,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64680db7 <unknown>
 
 bfmls   z31.h, z31.h, z7.h[7]  // 01100100-01111111-00001111-11111111
 // CHECK-INST: bfmls   z31.h, z31.h, z7.h[7]
 // CHECK-ENCODING: [0xff,0x0f,0x7f,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 647f0fff <unknown>
 
 
@@ -50,7 +61,7 @@ bfmls   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00101101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmls   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x2d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65282db7 <unknown>
 
 movprfx z23, z31
@@ -58,30 +69,30 @@ bfmls   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00101101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmls   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x2d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65282db7 <unknown>
 
 bfmls   z0.h, p0/m, z0.h, z0.h  // 01100101-00100000-00100000-00000000
 // CHECK-INST: bfmls   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x20,0x20,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65202000 <unknown>
 
 bfmls   z21.h, p5/m, z10.h, z21.h  // 01100101-00110101-00110101-01010101
 // CHECK-INST: bfmls   z21.h, p5/m, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x35,0x35,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65353555 <unknown>
 
 bfmls   z23.h, p3/m, z13.h, z8.h  // 01100101-00101000-00101101-10110111
 // CHECK-INST: bfmls   z23.h, p3/m, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x2d,0x28,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65282db7 <unknown>
 
 bfmls   z31.h, p7/m, z31.h, z31.h  // 01100101-00111111-00111111-11111111
 // CHECK-INST: bfmls   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x3f,0x3f,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 653f3fff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfmul.s b/llvm/test/MC/AArch64/SVE2p1/bfmul.s
index 62e7d892468b6f..593eb4aad2e786 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfmul.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfmul.s
@@ -1,38 +1,49 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 bfmul   z0.h, z0.h, z0.h[0]  // 01100100-00100000-00101000-00000000
 // CHECK-INST: bfmul   z0.h, z0.h, z0.h[0]
 // CHECK-ENCODING: [0x00,0x28,0x20,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64202800 <unknown>
 
 bfmul   z21.h, z10.h, z5.h[6]  // 01100100-01110101-00101001-01010101
 // CHECK-INST: bfmul   z21.h, z10.h, z5.h[6]
 // CHECK-ENCODING: [0x55,0x29,0x75,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 64752955 <unknown>
 
 bfmul   z23.h, z13.h, z0.h[5]  // 01100100-01101000-00101001-10110111
 // CHECK-INST: bfmul   z23.h, z13.h, z0.h[5]
 // CHECK-ENCODING: [0xb7,0x29,0x68,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 646829b7 <unknown>
 
 bfmul   z31.h, z31.h, z7.h[7]  // 01100100-01111111-00101011-11111111
 // CHECK-INST: bfmul   z31.h, z31.h, z7.h[7]
 // CHECK-ENCODING: [0xff,0x2b,0x7f,0x64]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 647f2bff <unknown>
 
 movprfx  z23.h, p3/m, z31.h
@@ -40,7 +51,7 @@ bfmul   z23.h, p3/m, z23.h, z13.h  // 01100101-00000010-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfmul   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65028db7 <unknown>
 
 movprfx z23, z31
@@ -48,54 +59,54 @@ bfmul   z23.h, p3/m, z23.h, z13.h  // 01100101-00000010-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfmul   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65028db7 <unknown>
 
 bfmul   z0.h, p0/m, z0.h, z0.h  // 01100101-00000010-10000000-00000000
 // CHECK-INST: bfmul   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65028000 <unknown>
 
 bfmul   z21.h, p5/m, z21.h, z10.h  // 01100101-00000010-10010101-01010101
 // CHECK-INST: bfmul   z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65029555 <unknown>
 
 bfmul   z23.h, p3/m, z23.h, z13.h  // 01100101-00000010-10001101-10110111
 // CHECK-INST: bfmul   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65028db7 <unknown>
 
 bfmul   z31.h, p7/m, z31.h, z31.h  // 01100101-00000010-10011111-11111111
 // CHECK-INST: bfmul   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x02,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65029fff <unknown>
 
 bfmul   z0.h, z0.h, z0.h  // 01100101-00000000-00001000-00000000
 // CHECK-INST: bfmul   z0.h, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x08,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65000800 <unknown>
 
 bfmul   z21.h, z10.h, z21.h  // 01100101-00010101-00001001-01010101
 // CHECK-INST: bfmul   z21.h, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x09,0x15,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65150955 <unknown>
 
 bfmul   z23.h, z13.h, z8.h  // 01100101-00001000-00001001-10110111
 // CHECK-INST: bfmul   z23.h, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x09,0x08,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 650809b7 <unknown>
 
 bfmul   z31.h, z31.h, z31.h  // 01100101-00011111-00001011-11111111
 // CHECK-INST: bfmul   z31.h, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x0b,0x1f,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 651f0bff <unknown>
 
diff --git a/llvm/test/MC/AArch64/SVE2p1/bfsub.s b/llvm/test/MC/AArch64/SVE2p1/bfsub.s
index 66590a72ed6b4c..5eae056ca46e7e 100644
--- a/llvm/test/MC/AArch64/SVE2p1/bfsub.s
+++ b/llvm/test/MC/AArch64/SVE2p1/bfsub.s
@@ -1,14 +1,25 @@
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 // RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
 // RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
-// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2p1,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
-// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sve2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2,+b16b16 < %s \
 // RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
-// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1,+b16b16 < %s \
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2,+b16b16 < %s \
 // RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
-// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2p1,+b16b16 -disassemble -show-encoding \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sve2,+b16b16 -disassemble -show-encoding \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --no-print-imm-hex --mattr=+sme2,+b16b16 - | FileCheck %s --check-prefix=CHECK-INST
+// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+b16b16 < %s \
+// RUN:        | llvm-objdump -d --mattr=-b16b16 - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+b16b16 < %s \
+// RUN:        | sed '/.text/d' | sed 's/.*encoding: //g' \
+// RUN:        | llvm-mc -triple=aarch64 -mattr=+sme2,+b16b16 -disassemble -show-encoding \
 // RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 
 movprfx  z23.h, p3/m, z31.h
@@ -16,7 +27,7 @@ bfsub   z23.h, p3/m, z23.h, z13.h  // 01100101-00000001-10001101-10110111
 // CHECK-INST:  movprfx  z23.h, p3/m, z31.h
 // CHECK-INST: bfsub   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65018db7 <unknown>
 
 movprfx z23, z31
@@ -24,53 +35,53 @@ bfsub   z23.h, p3/m, z23.h, z13.h  // 01100101-00000001-10001101-10110111
 // CHECK-INST:  movprfx z23, z31
 // CHECK-INST: bfsub   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65018db7 <unknown>
 
 bfsub   z0.h, p0/m, z0.h, z0.h  // 01100101-00000001-10000000-00000000
 // CHECK-INST: bfsub   z0.h, p0/m, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x80,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65018000 <unknown>
 
 bfsub   z21.h, p5/m, z21.h, z10.h  // 01100101-00000001-10010101-01010101
 // CHECK-INST: bfsub   z21.h, p5/m, z21.h, z10.h
 // CHECK-ENCODING: [0x55,0x95,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65019555 <unknown>
 
 bfsub   z23.h, p3/m, z23.h, z13.h  // 01100101-00000001-10001101-10110111
 // CHECK-INST: bfsub   z23.h, p3/m, z23.h, z13.h
 // CHECK-ENCODING: [0xb7,0x8d,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65018db7 <unknown>
 
 bfsub   z31.h, p7/m, z31.h, z31.h  // 01100101-00000001-10011111-11111111
 // CHECK-INST: bfsub   z31.h, p7/m, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x9f,0x01,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65019fff <unknown>
 
 bfsub   z0.h, z0.h, z0.h  // 01100101-00000000-00000100-00000000
 // CHECK-INST: bfsub   z0.h, z0.h, z0.h
 // CHECK-ENCODING: [0x00,0x04,0x00,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65000400 <unknown>
 
 bfsub   z21.h, z10.h, z21.h  // 01100101-00010101-00000101-01010101
 // CHECK-INST: bfsub   z21.h, z10.h, z21.h
 // CHECK-ENCODING: [0x55,0x05,0x15,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 65150555 <unknown>
 
 bfsub   z23.h, z13.h, z8.h  // 01100101-00001000-00000101-10110111
 // CHECK-INST: bfsub   z23.h, z13.h, z8.h
 // CHECK-ENCODING: [0xb7,0x05,0x08,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 650805b7 <unknown>
 
 bfsub   z31.h, z31.h, z31.h  // 01100101-00011111-00000111-11111111
 // CHECK-INST: bfsub   z31.h, z31.h, z31.h
 // CHECK-ENCODING: [0xff,0x07,0x1f,0x65]
-// CHECK-ERROR: instruction requires: b16b16 sve2p1
+// CHECK-ERROR: instruction requires: b16b16 sve2 or sme2
 // CHECK-UNKNOWN: 651f07ff <unknown>

>From ddae1503418032a4e2597cbdff44ff1d186f6932 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Mon, 18 Dec 2023 18:24:44 +0000
Subject: [PATCH 2/3] [fixup] Refactor a tiny bit in the A64FX sched model and
 remove a stale comment

---
 clang/include/clang/Basic/arm_sve.td         | 2 +-
 llvm/lib/Target/AArch64/AArch64SchedA64FX.td | 8 ++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index e84d6e5e4cc602..07e75e60fedb67 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2100,7 +2100,7 @@ def SVMLA_LANE_BF  : SInst<"svmla_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch
 def SVMLS_LANE_BF  : SInst<"svmls_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
 def SVMUL_LANE_BF  : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
 def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [], []>;
-} //sve2p1,b16b16
+}
 
 // SME2
 
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
index 65b97ff6956a11..b3d8c999c1f420 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedA64FX.td
@@ -20,12 +20,8 @@ def A64FXModel : SchedMachineModel {
   let PostRAScheduler       =   1; // Using PostRA sched.
   let CompleteModel         =   1;
 
-  list<Predicate> UnsupportedFeatures =
-    [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
-     HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
-     HasSVE2p1_or_HasSME2p1, HasSMEF16F16, HasSSVE_FP8FMA, HasSMEF8F16, HasSMEF8F32,
-     HasSMEFA64];
-
+  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
+                                                    [HasMTE, HasMatMulInt8, HasBF16, HasPAuth]);
   let FullInstRWOverlapCheck = 0;
 }
 

>From 9ae596b1ed1b4e0b1720317e84c90e69d79f6007 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 19 Dec 2023 17:43:22 +0000
Subject: [PATCH 3/3] [fixup] Add the streaming compatible flag to definitions
 and tests

---
 clang/include/clang/Basic/arm_sve.td          | 26 +++++++++----------
 .../acle_sve2p1_bfadd.c                       | 12 ++++-----
 .../acle_sve2p1_bfmax.c                       | 12 ++++-----
 .../acle_sve2p1_bfmaxnm.c                     | 12 ++++-----
 .../acle_sve2p1_bfmin.c                       | 12 ++++-----
 .../acle_sve2p1_bfminnm.c                     | 12 ++++-----
 .../acle_sve2p1_bfmla.c                       | 12 ++++-----
 .../acle_sve2p1_bfmls.c                       | 12 ++++-----
 .../acle_sve2p1_bfmul.c                       | 12 ++++-----
 .../acle_sve2p1_bfsub.c                       | 12 ++++-----
 10 files changed, 67 insertions(+), 67 deletions(-)

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 07e75e60fedb67..04bf7acdeba799 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2087,19 +2087,19 @@ let TargetGuard = "sve2p1|sme2" in {
 }
 
 let TargetGuard = "(sve2|sme2),b16b16" in {
-defm SVMUL_BF  : SInstZPZZ<"svmul",  "b", "aarch64_sve_fmul",   "aarch64_sve_fmul_u">;
-defm SVADD_BF  : SInstZPZZ<"svadd",  "b", "aarch64_sve_fadd",   "aarch64_sve_fadd_u">;
-defm SVSUB_BF  : SInstZPZZ<"svsub",  "b", "aarch64_sve_fsub",   "aarch64_sve_fsub_u">;
-defm SVMAXNM_BF  : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u">;
-defm SVMINNM_BF  : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u">;
-defm SVMAX_BF    : SInstZPZZ<"svmax",  "b", "aarch64_sve_fmax",   "aarch64_sve_fmax_u">;
-defm SVMIN_BF    : SInstZPZZ<"svmin",  "b", "aarch64_sve_fmin",   "aarch64_sve_fmin_u">;
-defm SVMLA_BF  : SInstZPZZZ<"svmla",  "b", "aarch64_sve_fmla",  "aarch64_sve_fmla_u", []>;
-defm SVMLS_BF  : SInstZPZZZ<"svmls",  "b", "aarch64_sve_fmls",  "aarch64_sve_fmls_u", []>;
-def SVMLA_LANE_BF  : SInst<"svmla_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-def SVMLS_LANE_BF  : SInst<"svmls_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
-def SVMUL_LANE_BF  : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
-def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [], []>;
+defm SVMUL_BF  : SInstZPZZ<"svmul",  "b", "aarch64_sve_fmul",   "aarch64_sve_fmul_u", [IsStreamingCompatible]>;
+defm SVADD_BF  : SInstZPZZ<"svadd",  "b", "aarch64_sve_fadd",   "aarch64_sve_fadd_u", [IsStreamingCompatible]>;
+defm SVSUB_BF  : SInstZPZZ<"svsub",  "b", "aarch64_sve_fsub",   "aarch64_sve_fsub_u", [IsStreamingCompatible]>;
+defm SVMAXNM_BF  : SInstZPZZ<"svmaxnm","b", "aarch64_sve_fmaxnm", "aarch64_sve_fmaxnm_u", [IsStreamingCompatible]>;
+defm SVMINNM_BF  : SInstZPZZ<"svminnm","b", "aarch64_sve_fminnm", "aarch64_sve_fminnm_u", [IsStreamingCompatible]>;
+defm SVMAX_BF    : SInstZPZZ<"svmax",  "b", "aarch64_sve_fmax",   "aarch64_sve_fmax_u", [IsStreamingCompatible]>;
+defm SVMIN_BF    : SInstZPZZ<"svmin",  "b", "aarch64_sve_fmin",   "aarch64_sve_fmin_u", [IsStreamingCompatible]>;
+defm SVMLA_BF  : SInstZPZZZ<"svmla",  "b", "aarch64_sve_fmla",  "aarch64_sve_fmla_u", [IsStreamingCompatible]>;
+defm SVMLS_BF  : SInstZPZZZ<"svmls",  "b", "aarch64_sve_fmls",  "aarch64_sve_fmls_u", [IsStreamingCompatible]>;
+def SVMLA_LANE_BF  : SInst<"svmla_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch64_sve_fmla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SVMLS_LANE_BF  : SInst<"svmls_lane[_{d}]",  "ddddi",  "b", MergeNone, "aarch64_sve_fmls_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
+def SVMUL_LANE_BF  : SInst<"svmul_lane[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_fmul_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>;
+def SVFCLAMP_BF   : SInst<"svclamp[_{d}]", "dddd", "b", MergeNone, "aarch64_sve_fclamp", [IsStreamingCompatible], []>;
 }
 
 // SME2
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
index a3026fee3f6d29..2af8995b6fc9b9 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svadd, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svadd_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svadd, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svadd_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svadd, _bf16, _x)(pg, op1, op2);
 }
@@ -84,7 +84,7 @@ svbfloat16_t test_svadd_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svadd, _n_bf16, _m)(pg, op1, op2);
 }
@@ -107,7 +107,7 @@ svbfloat16_t test_svadd_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svadd, _n_bf16, _z)(pg, op1, op2);
 }
@@ -128,7 +128,7 @@ svbfloat16_t test_svadd_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fadd.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svadd_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svadd, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
index 4bdd4e50bbdb60..b0534753b1be93 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmax.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmax, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmax_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmax, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmax_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmax, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svmax_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmax, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svmax_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmax, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svmax_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmax.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmax_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmax, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
index 5e8d8b55edbf18..ddf0a5711bab92 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmaxnm.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmaxnm, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmaxnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmaxnm, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmaxnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmaxnm, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svmaxnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svmaxnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svmaxnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmaxnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmaxnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmaxnm, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
index f9829cd969eabb..42d29e6dbc3954 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmin.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmin, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmin_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmin, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmin_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmin, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svmin_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmin, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svmin_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmin, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svmin_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmin.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmin_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmin, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
index ff2f0388cdc82b..27d85374aadcd3 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfminnm.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svminnm, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svminnm_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svminnm, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svminnm_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svminnm, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svminnm_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svminnm, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svminnm_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svminnm, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svminnm_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fminnm.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svminnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svminnm_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svminnm, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
index 63e7f12d69c5c2..4928147767d80d 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmla.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _bf16, _m)(pg, op1, op2, op3);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmla_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _bf16, _z)(pg, op1, op2, op3);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmla_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _bf16, _x)(pg, op1, op2, op3);
 }
@@ -84,7 +84,7 @@ svbfloat16_t test_svmla_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _n_bf16, _m)(pg, op1, op2, op3);
 }
@@ -107,7 +107,7 @@ svbfloat16_t test_svmla_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _n_bf16, _z)(pg, op1, op2, op3);
 }
@@ -128,7 +128,7 @@ svbfloat16_t test_svmla_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmla.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmla_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmla_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmla, _n_bf16, _x)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
index 61a0ded321e2f2..e952d1b17ae8cd 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmls.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _bf16, _m)(pg, op1, op2, op3);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmls_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _bf16, _z)(pg, op1, op2, op3);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmls_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[OP3:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3)
+svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, svbfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _bf16, _x)(pg, op1, op2, op3);
 }
@@ -84,7 +84,7 @@ svbfloat16_t test_svmls_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2,
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _n_bf16, _m)(pg, op1, op2, op3);
 }
@@ -107,7 +107,7 @@ svbfloat16_t test_svmls_n_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _n_bf16, _z)(pg, op1, op2, op3);
 }
@@ -128,7 +128,7 @@ svbfloat16_t test_svmls_n_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmls.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmls_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3)
+svbfloat16_t test_svmls_n_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2, bfloat16_t op3) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmls, _n_bf16, _x)(pg, op1, op2, op3);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
index 8c8df280dd93df..078ea58408ad95 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmul.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmul, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svmul_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmul, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svmul_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmul, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svmul_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svmul, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svmul_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmul, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svmul_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fmul.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svmul_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svmul_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svmul, _n_bf16, _x)(pg, op1, op2);
 }
diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
index aad12fbe9b839a..442562e9ed20a5 100644
--- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
+++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfsub.c
@@ -27,7 +27,7 @@
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svsub, _bf16, _m)(pg, op1, op2);
 }
@@ -46,7 +46,7 @@ svbfloat16_t test_svsub_bf16_m(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svsub, _bf16, _z)(pg, op1, op2);
 }
@@ -63,7 +63,7 @@ svbfloat16_t test_svsub_bf16_z(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[OP2:%.*]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
+svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svsub, _bf16, _x)(pg, op1, op2);
 }
@@ -85,7 +85,7 @@ svbfloat16_t test_svsub_bf16_x(svbool_t pg, svbfloat16_t op1, svbfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
   return SVE_ACLE_FUNC(svsub, _n_bf16, _m)(pg, op1, op2);
 }
@@ -108,7 +108,7 @@ svbfloat16_t test_svsub_bf16_n_m(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[TMP1]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP2]]
 //
-svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svsub, _n_bf16, _z)(pg, op1, op2);
 }
@@ -129,7 +129,7 @@ svbfloat16_t test_svsub_bf16_n_z(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
 // CPP-CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.fsub.u.nxv8bf16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x bfloat> [[OP1:%.*]], <vscale x 8 x bfloat> [[DOTSPLAT]])
 // CPP-CHECK-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
 //
-svbfloat16_t test_svsub_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2)
+svbfloat16_t test_svsub_bf16_n_x(svbool_t pg, svbfloat16_t op1, bfloat16_t op2) __arm_streaming_compatible
 {
    return SVE_ACLE_FUNC(svsub, _n_bf16, _x)(pg, op1, op2);
 }