[clang] [llvm] [AArch64] Introduce new armv9.6 features (PR #111677)

Fri Oct 11 05:57:33 PDT 2024

https://github.com/Lukacma updated https://github.com/llvm/llvm-project/pull/111677

>From efabe14befb5c35ae35826d79af0b64756cb3e9b Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 9 Oct 2024 10:52:42 +0000
Subject: [PATCH 1/2] [AArch64] Introduce new armv9.6 features

---
 .../print-supported-extensions-aarch64.c      |  11 ++
 llvm/lib/Target/AArch64/AArch64Features.td    |  37 +++++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  35 +++++-
 .../TargetParser/TargetParserTest.cpp         | 119 ++++++++++++++++--
 4 files changed, 191 insertions(+), 11 deletions(-)

diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index e6247307c7219f..c582154bd0370d 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -8,6 +8,7 @@
 // CHECK-NEXT:     bf16                FEAT_BF16                                              Enable BFloat16 Extension
 // CHECK-NEXT:     brbe                FEAT_BRBE                                              Enable Branch Record Buffer Extension
 // CHECK-NEXT:     bti                 FEAT_BTI                                               Enable Branch Target Identification
+// CHECK-NEXT:     cmpbr               FEAT_CMPBR                                             Enable A64 base compare and branch instructions
 // CHECK-NEXT:     fcma                FEAT_FCMA                                              Enable Armv8.3-A Floating-point complex number support
 // CHECK-NEXT:     cpa                 FEAT_CPA                                               Enable Armv9.5-A Checked Pointer Arithmetic
 // CHECK-NEXT:     crc                 FEAT_CRC32                                             Enable Armv8.0-A CRC-32 checksum instructions
@@ -18,6 +19,8 @@
 // CHECK-NEXT:     dotprod             FEAT_DotProd                                           Enable dot product support
 // CHECK-NEXT:     f32mm               FEAT_F32MM                                             Enable Matrix Multiply FP32 Extension
 // CHECK-NEXT:     f64mm               FEAT_F64MM                                             Enable Matrix Multiply FP64 Extension
+// CHECK-NEXT:     f8f16mm             FEAT_F8F16MM                                           Enable FP8 to Half-Precision Matrix Multiplication
+// CHECK-NEXT:     f8f32mm             FEAT_F8F32MM                                           Enable FP8 to Single-Precision Matrix Multiplication
 // CHECK-NEXT:     faminmax            FEAT_FAMINMAX                                          Enable FAMIN and FAMAX instructions
 // CHECK-NEXT:     flagm               FEAT_FlagM                                             Enable Armv8.4-A Flag Manipulation instructions
 // CHECK-NEXT:     fp                  FEAT_FP                                                Enable Armv8.0-A Floating Point Extensions
@@ -26,6 +29,7 @@
 // CHECK-NEXT:     fp8dot2             FEAT_FP8DOT2                                           Enable FP8 2-way dot instructions
 // CHECK-NEXT:     fp8dot4             FEAT_FP8DOT4                                           Enable FP8 4-way dot instructions
 // CHECK-NEXT:     fp8fma              FEAT_FP8FMA                                            Enable Armv9.5-A FP8 multiply-add instructions
+// CHECK-NEXT:     fprcvt              FEAT_FPRCVT                                            Enable A64 base convert instructions for SIMD&FP scalar register operands of different input and output sizes
 // CHECK-NEXT:     fp16                FEAT_FP16                                              Enable half-precision floating-point data processing
 // CHECK-NEXT:     gcs                 FEAT_GCS                                               Enable Armv9.4-A Guarded Call Stack Extension
 // CHECK-NEXT:     hbc                 FEAT_HBC                                               Enable Armv8.8-A Hinted Conditional Branches Extension
@@ -35,6 +39,7 @@
 // CHECK-NEXT:     ls64                FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA              Enable Armv8.7-A LD64B/ST64B Accelerator Extension
 // CHECK-NEXT:     lse                 FEAT_LSE                                               Enable Armv8.1-A Large System Extension (LSE) atomic instructions
 // CHECK-NEXT:     lse128              FEAT_LSE128                                            Enable Armv9.4-A 128-bit Atomic instructions
+// CHECK-NEXT:     lsfe                FEAT_LSFE                                              Enable A64 base Atomic floating-point in-memory instructions
 // CHECK-NEXT:     lut                 FEAT_LUT                                               Enable Lookup Table instructions
 // CHECK-NEXT:     mops                FEAT_MOPS                                              Enable Armv8.8-A memcpy and memset acceleration instructions
 // CHECK-NEXT:     memtag              FEAT_MTE, FEAT_MTE2                                    Enable Memory Tagging Extension
@@ -64,20 +69,26 @@
 // CHECK-NEXT:     sme-lutv2           FEAT_SME_LUTv2                                         Enable Scalable Matrix Extension (SME) LUTv2 instructions
 // CHECK-NEXT:     sme2                FEAT_SME2                                              Enable Scalable Matrix Extension 2 (SME2) instructions
 // CHECK-NEXT:     sme2p1              FEAT_SME2p1                                            Enable Scalable Matrix Extension 2.1 instructions
+// CHECK-NEXT:     sme2p2              FEAT_SME2p2                                            Enable Scalable Matrix Extension 2.2 instructions
 // CHECK-NEXT:     profile             FEAT_SPE                                               Enable Statistical Profiling extension
 // CHECK-NEXT:     predres2            FEAT_SPECRES2                                          Enable Speculation Restriction Instruction
 // CHECK-NEXT:     ssbs                FEAT_SSBS, FEAT_SSBS2                                  Enable Speculative Store Bypass Safe bit
+// CHECK-NEXT:     ssve-aes            FEAT_SSVE_AES                                          Enable SVE2 AES support in streaming SVE mode
 // CHECK-NEXT:     ssve-fp8dot2        FEAT_SSVE_FP8DOT2                                      Enable SVE2 FP8 2-way dot product instructions
 // CHECK-NEXT:     ssve-fp8dot4        FEAT_SSVE_FP8DOT4                                      Enable SVE2 FP8 4-way dot product instructions
 // CHECK-NEXT:     ssve-fp8fma         FEAT_SSVE_FP8FMA                                       Enable SVE2 FP8 multiply-add instructions
 // CHECK-NEXT:     sve                 FEAT_SVE                                               Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT:     sve-aes2            FEAT_SVE_AES2                                          Enable SVE multi-vector AES and 128-bit PMULL instructions
 // CHECK-NEXT:     sve-b16b16          FEAT_SVE_B16B16                                        Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
+// CHECK-NEXT:     sve-bfscale         FEAT_SVE_BFSCALE                                       Enable SVE BFloat16 scaling instructions
+// CHECK-NEXT:     sve-f16f32mm        FEAT_SVE_F16F32MM                                      Enable FP16 to FP32 Matrix Multiply
 // CHECK-NEXT:     sve2                FEAT_SVE2                                              Enable Scalable Vector Extension 2 (SVE2) instructions
 // CHECK-NEXT:     sve2-aes            FEAT_SVE_AES, FEAT_SVE_PMULL128                        Enable AES SVE2 instructions
 // CHECK-NEXT:     sve2-bitperm        FEAT_SVE_BitPerm                                       Enable bit permutation SVE2 instructions
 // CHECK-NEXT:     sve2-sha3           FEAT_SVE_SHA3                                          Enable SHA3 SVE2 instructions
 // CHECK-NEXT:     sve2-sm4            FEAT_SVE_SM4                                           Enable SM4 SVE2 instructions
 // CHECK-NEXT:     sve2p1              FEAT_SVE2p1                                            Enable Scalable Vector Extension 2.1 instructions
+// CHECK-NEXT:     sve2p2              FEAT_SVE2p2                                            Enable Scalable Vector Extension 2.2 instructions
 // CHECK-NEXT:     the                 FEAT_THE                                               Enable Armv8.9-A Translation Hardening Extension
 // CHECK-NEXT:     tlbiw               FEAT_TLBIW                                             Enable Armv9.5-A TLBI VMALL for Dirty State
 // CHECK-NEXT:     tme                 FEAT_TME                                               Enable Transactional Memory Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 97671bc59f6b9e..10dbf900271a32 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -522,6 +522,39 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
 //  Armv9.6 Architecture Extensions
 //===----------------------------------------------------------------------===//
 
+def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
+  "Enable A64 base compare and branch instructions">;
+
+def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
+  "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
+
+def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
+  "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
+
+def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
+  "Enable A64 base convert instructions for SIMD&FP scalar register operands of"
+  " different input and output sizes", [FeatureFPARMv8]>;
+
+def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
+  "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
+
+def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
+  "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
+
+def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
+  "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
+
+def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
+  "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
+
+def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
+  "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
+
+def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
+  "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
+
+def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
+  "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
 
 //===----------------------------------------------------------------------===//
 //  Other Features
@@ -833,8 +866,8 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
   [HasV9_4aOps, FeatureCPA],
   !listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA,  FeatureLUT, FeatureFAMINMAX])>;
 def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
-  [HasV9_5aOps],
-  !listconcat(HasV9_5aOps.DefaultExts, [])>;
+  [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2],
+  !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2])>;
 def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
   [ //v8.1
     FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4374d92a5b7b16..a41d5080c909fc 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -213,12 +213,35 @@ def HasSMEF8F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8
 def HasSMEF8F32     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
                                  AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
 
+def HasCMPBR        : Predicate<"Subtarget->hasCMPBR()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
+def HasF8F32MM      : Predicate<"Subtarget->hasF8F32MM()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
+def HasF8F16MM      : Predicate<"Subtarget->hasF8F16MM()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
+def HasFPRCVT       : Predicate<"Subtarget->hasFPRCVT()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
+def HasLSFE         : Predicate<"Subtarget->hasLSFE()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
+def HasSME2p2       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
+def HasSVEAES2      : Predicate<"Subtarget->hasSVEAES2()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
+def HasSVEBFSCALE   : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
+def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
+                                 AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
 // A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
 // they should be enabled if either has been specified.
 def HasSVEorSME
     : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
                 "sve or sme">;
+def HasSVEorSME2p2
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
+                "sve or sme2p2">;
 def HasSVE2orSME
     : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
@@ -227,6 +250,10 @@ def HasSVE2orSME2
     : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
                 AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
                 "sve2 or sme2">;
+def HasSVE2orSSVE_AES
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES), "sve2 or ssve-aes">;
 def HasSVE2p1_or_HasSME
     : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
                  AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -236,7 +263,13 @@ def HasSVE2p1_or_HasSME2
 def HasSVE2p1_or_HasSME2p1
     : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
                  AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
-
+def HasSVE2p2orSME2p2
+    : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
+                 AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">;
+def HasSVE2p1orSSVE_AES
+    : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()) ||"
+                "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
+                AssemblerPredicateWithAll<(any_of FeatureSVE2p1, FeatureSSVE_AES), "sve2p1 or ssve-aes">;
 def HasSMEF16F16orSMEF8F16
     : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
                 AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 5b5d45f6c574bf..0d59b41a2f7510 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1323,8 +1323,12 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
       AArch64::AEK_CPA,          AArch64::AEK_PAUTHLR,
       AArch64::AEK_TLBIW,        AArch64::AEK_JSCVT,
       AArch64::AEK_FCMA,         AArch64::AEK_FP8,
-      AArch64::AEK_SVEB16B16,
-  };
+      AArch64::AEK_SVEB16B16,    AArch64::AEK_SVE2P2,
+      AArch64::AEK_SME2P2,       AArch64::AEK_SVE_BFSCALE,
+      AArch64::AEK_SVE_F16F32MM, AArch64::AEK_SVE_AES2,
+      AArch64::AEK_SSVE_AES,     AArch64::AEK_F8F32MM,
+      AArch64::AEK_F8F16MM,      AArch64::AEK_LSFE,
+      AArch64::AEK_FPRCVT,       AArch64::AEK_CMPBR};
 
   std::vector<StringRef> Features;
 
@@ -1356,12 +1360,16 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+ras"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sve-aes2"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+ssve-aes"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sve2p2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+rcpc"));
   EXPECT_TRUE(llvm::is_contained(Features, "+rand"));
   EXPECT_TRUE(llvm::is_contained(Features, "+mte"));
@@ -1384,6 +1392,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+sme2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sme2p2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+hbc"));
   EXPECT_TRUE(llvm::is_contained(Features, "+mops"));
   EXPECT_TRUE(llvm::is_contained(Features, "+perfmon"));
@@ -1403,6 +1412,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4"));
   EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+f8f32mm"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+f8f16mm"));
   EXPECT_TRUE(llvm::is_contained(Features, "+lut"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-lutv2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f16"));
@@ -1413,6 +1424,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+tlbiw"));
   EXPECT_TRUE(llvm::is_contained(Features, "+jsconv"));
   EXPECT_TRUE(llvm::is_contained(Features, "+complxnum"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+lsfe"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+fprcvt"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+cmpbr"));
 
   // Assuming we listed every extension above, this should produce the same
   // result.
@@ -1510,12 +1524,16 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"rdm", "nordm", "+rdm", "-rdm"},
       {"sve", "nosve", "+sve", "-sve"},
       {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
+      {"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"},
       {"sve2", "nosve2", "+sve2", "-sve2"},
       {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
       {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
       {"sve2-sha3", "nosve2-sha3", "+sve2-sha3", "-sve2-sha3"},
       {"sve2p1", "nosve2p1", "+sve2p1", "-sve2p1"},
+      {"sve2p2", "nosve2p2", "+sve2p2", "-sve2p2"},
       {"sve2-bitperm", "nosve2-bitperm", "+sve2-bitperm", "-sve2-bitperm"},
+      {"sve-aes2", "nosve-aes2", "+sve-aes2", "-sve-aes2"},
+      {"ssve-aes", "nossve-aes", "+ssve-aes", "-ssve-aes"},
       {"dotprod", "nodotprod", "+dotprod", "-dotprod"},
       {"rcpc", "norcpc", "+rcpc", "-rcpc"},
       {"rng", "norng", "+rand", "-rand"},
@@ -1528,6 +1546,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"i8mm", "noi8mm", "+i8mm", "-i8mm"},
       {"f32mm", "nof32mm", "+f32mm", "-f32mm"},
       {"f64mm", "nof64mm", "+f64mm", "-f64mm"},
+      {"f8f32mm", "nof8f32mm", "+f8f32mm", "-f8f32mm"},
+      {"f8f16mm", "nof8f16mm", "+f8f16mm", "-f8f16mm"},
       {"sme", "nosme", "+sme", "-sme"},
       {"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"},
       {"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
@@ -1536,6 +1556,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sme2", "nosme2", "+sme2", "-sme2"},
       {"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"},
       {"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"},
+      {"sme2p2", "nosme2p2", "+sme2p2", "-sme2p2"},
       {"hbc", "nohbc", "+hbc", "-hbc"},
       {"mops", "nomops", "+mops", "-mops"},
       {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"},
@@ -1554,7 +1575,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sme-lutv2", "nosme-lutv2", "+sme-lutv2", "-sme-lutv2"},
       {"sme-f8f16", "nosme-f8f16", "+sme-f8f16", "-sme-f8f16"},
       {"sme-f8f32", "nosme-f8f32", "+sme-f8f32", "-sme-f8f32"},
-  };
+      {"lsfe", "nolsfe", "+lsfe", "-lsfe"},
+      {"fprcvt", "nofprcvt", "+fprcvt", "-fprcvt"},
+      {"cmpbr", "nocmpbr", "+cmpbr", "-cmpbr"}};
 
   for (unsigned i = 0; i < std::size(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]),
@@ -1744,6 +1767,14 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"nofp", "jscvt"}, {"fp-armv8", "jsconv"}, {}},
         {AArch64::ARMV8A, {"jscvt", "nofp"}, {}, {"fp-armv8", "jsconv"}},
 
+        // fp -> lsfe
+        {AArch64::ARMV9_6A, {"nofp", "lsfe"}, {"fp-armv8", "lsfe"}, {}},
+        {AArch64::ARMV9_6A, {"lsfe", "nofp"}, {}, {"fp-armv8", "lsfe"}},
+
+        // fp -> fprcvt
+        {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}},
+        {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}},
+
         // simd -> {aes, sha2, sha3, sm4}
         {AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}},
         {AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}},
@@ -1780,7 +1811,7 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"nosve", "f64mm"}, {"sve", "f64mm"}, {}},
         {AArch64::ARMV8A, {"f64mm", "nosve"}, {}, {"sve", "f64mm"}},
 
-        // sve2 -> {sve2p1, sve2-bitperm, sve2-aes, sve2-sha3, sve2-sm4}
+        // sve2 -> {sve2p1, sve2-bitperm, sve2-sha3, sve2-sm4}
         {AArch64::ARMV8A, {"nosve2", "sve2p1"}, {"sve2", "sve2p1"}, {}},
         {AArch64::ARMV8A, {"sve2p1", "nosve2"}, {}, {"sve2", "sve2p1"}},
         {AArch64::ARMV8A,
@@ -1791,22 +1822,52 @@ AArch64ExtensionDependenciesBaseArchTestParams
          {"sve2-bitperm", "nosve2"},
          {},
          {"sve2", "sve2-bitperm"}},
-        {AArch64::ARMV8A, {"nosve2", "sve2-aes"}, {"sve2", "sve2-aes"}, {}},
-        {AArch64::ARMV8A, {"sve2-aes", "nosve2"}, {}, {"sve2", "sve2-aes"}},
         {AArch64::ARMV8A, {"nosve2", "sve2-sha3"}, {"sve2", "sve2-sha3"}, {}},
         {AArch64::ARMV8A, {"sve2-sha3", "nosve2"}, {}, {"sve2", "sve2-sha3"}},
         {AArch64::ARMV8A, {"nosve2", "sve2-sm4"}, {"sve2", "sve2-sm4"}, {}},
         {AArch64::ARMV8A, {"sve2-sm4", "nosve2"}, {}, {"sve2", "sve2-sm4"}},
 
-        // sve-b16b16 -> {sme-b16b16}
+        // sve-b16b16 -> {sme-b16b16, sve-bfscale}
         {AArch64::ARMV8A,
          {"nosve-b16b16", "sme-b16b16"},
          {"sve-b16b16", "sme-b16b16"},
          {}},
-        {AArch64::ARMV8A,
+        {AArch64::ARMV9_6A,
          {"sme-b16b16", "nosve-b16b16"},
          {},
          {"sve-b16b16", "sme-b16b16"}},
+        {AArch64::ARMV9_6A,
+         {"nosve-b16b16", "sve-bfscale"},
+         {"sve-b16b16", "sve-bfscale"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"sve-bfscale", "nosve-b16b16"},
+         {},
+         {"sve-b16b16", "sve-bfscale"}},
+
+        // sve2p1 -> {sve2p2}
+        {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}},
+        {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}},
+
+        // sve2p1 -> sve-f16f32mm
+        {AArch64::ARMV9_6A,
+         {"nosve2p1", "sve-f16f32mm"},
+         {"sve2p1", "sve-f16f32mm"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"sve-f16f32mm", "nosve2p1"},
+         {},
+         {"sve2p1", "sve-f16f32mm"}},
+
+        // sve2-aes -> {sve-aes2}
+        {AArch64::ARMV9_6A,
+         {"nosve2-aes", "sve-aes2"},
+         {"sve2-aes", "sve-aes2"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"sve-aes2", "nosve2-aes"},
+         {},
+         {"sve2-aes", "sve-aes2"}},
 
         // sme -> {sme2, sme-f16f16, sme-f64f64, sme-i16i64, sme-fa64}
         {AArch64::ARMV8A, {"nosme", "sme2"}, {"sme", "sme2"}, {}},
@@ -1855,6 +1916,18 @@ AArch64ExtensionDependenciesBaseArchTestParams
         {AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}},
         {AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}},
 
+        // sme2p1 -> {sme2p2, ssve-aes}
+        {AArch64::ARMV9_6A, {"nosme2p1", "sme2p2"}, {"sme2p2", "sme2p1"}, {}},
+        {AArch64::ARMV9_6A, {"sme2p2", "nosme2p1"}, {}, {"sme2p1", "sme2p2"}},
+        {AArch64::ARMV9_6A,
+         {"nosme2p1", "ssve-aes"},
+         {"sme2p1", "ssve-aes"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"ssve-aes", "nosme2p1"},
+         {},
+         {"ssve-aes", "sme2p1"}},
+
         // fp8 -> {sme-f8f16, sme-f8f32}
         {AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}},
         {AArch64::ARMV8A, {"sme-f8f16", "nofp8"}, {}, {"fp8", "sme-f8f16"}},
@@ -1882,6 +1955,36 @@ AArch64ExtensionDependenciesBaseArchTestParams
         // rcpc -> rcpc3
         {AArch64::ARMV8A, {"norcpc", "rcpc3"}, {"rcpc", "rcpc3"}, {}},
         {AArch64::ARMV8A, {"rcpc3", "norcpc"}, {}, {"rcpc", "rcpc3"}},
+
+        // fp8dot4 -> f8f32mm
+        {AArch64::ARMV9_6A,
+         {"nofp8dot4", "f8f32mm"},
+         {"fp8dot4", "f8f32mm"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"f8f32mm", "nofp8dot4"},
+         {},
+         {"f8f32mm", "fp8dot4"}},
+
+        // f8f32mm -> f8f16mm
+        {AArch64::ARMV9_6A,
+         {"nof8f32mm", "f8f16mm"},
+         {"f8f16mm", "f8f32mm"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"f8f16mm", "nof8f32mm"},
+         {},
+         {"f8f16mm", "f8f32mm"}},
+
+        // fp8dot2 -> f8f16mm
+        {AArch64::ARMV9_6A,
+         {"nofp8dot2", "f8f16mm"},
+         {"f8f16mm", "fp8dot2"},
+         {}},
+        {AArch64::ARMV9_6A,
+         {"f8f16mm", "nofp8dot2"},
+         {},
+         {"f8f16mm", "fp8dot2"}},
 };
 
 INSTANTIATE_TEST_SUITE_P(

>From 1bd148cc61382543b49ec09ca370e7d17309cd1d Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 11 Oct 2024 12:52:36 +0000
Subject: [PATCH 2/2] Add Armv-9.6A to feature description and add missing
 tests for sve-f16f32mm

---
 llvm/lib/Target/AArch64/AArch64Features.td    | 22 +++++++++----------
 .../TargetParser/TargetParserTest.cpp         |  5 +++--
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 10dbf900271a32..0f45cf481a8ada 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -523,38 +523,38 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
 //===----------------------------------------------------------------------===//
 
 def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
-  "Enable A64 base compare and branch instructions">;
+  "Enable Armv9.6-A base compare and branch instructions">;
 
 def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
-  "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
+  "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
 
 def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
-  "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
+  "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
 
 def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
-  "Enable A64 base convert instructions for SIMD&FP scalar register operands of"
+  "Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of"
   " different input and output sizes", [FeatureFPARMv8]>;
 
 def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
-  "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
+  "Enable Armv9.6-A base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
 
 def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
-  "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
+  "Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
 
 def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
-  "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
+  "Enable Armv9.6-A SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
 
 def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
-  "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
+  "Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
 
 def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
-  "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
+  "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
 
 def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
-  "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
+  "Enable Armv9.6-A SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
 
 def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
-  "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
+  "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
 
 //===----------------------------------------------------------------------===//
 //  Other Features
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 0d59b41a2f7510..31fed84cc0a817 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1361,6 +1361,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(llvm::is_contained(Features, "+sve"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale"));
+  EXPECT_TRUE(llvm::is_contained(Features, "+sve-f16f32mm"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes"));
   EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4"));
@@ -1525,6 +1526,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"sve", "nosve", "+sve", "-sve"},
       {"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
       {"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"},
+      {"sve-f16f32mm", "nosve-f16f32mm", "+sve-f16f32mm", "-sve-f16f32mm"},
       {"sve2", "nosve2", "+sve2", "-sve2"},
       {"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
       {"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
@@ -1845,11 +1847,10 @@ AArch64ExtensionDependenciesBaseArchTestParams
          {},
          {"sve-b16b16", "sve-bfscale"}},
 
-        // sve2p1 -> {sve2p2}
+        // sve2p1 -> {sve2p2, sve-f16f32mm}
         {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}},
         {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}},
 
-        // sve2p1 -> sve-f16f32mm
         {AArch64::ARMV9_6A,
          {"nosve2p1", "sve-f16f32mm"},
          {"sve2p1", "sve-f16f32mm"},