[clang] [llvm] [AArch64] Introduce new armv9.6 features (PR #111677)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 16 10:20:06 PDT 2024
https://github.com/SpencerAbson updated https://github.com/llvm/llvm-project/pull/111677
>From f02f798702ec74d7168f5f95446a99ad2bc0e2e3 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Wed, 9 Oct 2024 10:52:42 +0000
Subject: [PATCH 1/4] [AArch64] Introduce new armv9.6 features
---
.../print-supported-extensions-aarch64.c | 11 ++
llvm/lib/Target/AArch64/AArch64Features.td | 37 +++++-
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 35 +++++-
.../TargetParser/TargetParserTest.cpp | 119 ++++++++++++++++--
4 files changed, 191 insertions(+), 11 deletions(-)
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index e6247307c7219f..c582154bd0370d 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -8,6 +8,7 @@
// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension
// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension
// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification
+// CHECK-NEXT: cmpbr FEAT_CMPBR Enable A64 base compare and branch instructions
// CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
// CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic
// CHECK-NEXT: crc FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
@@ -18,6 +19,8 @@
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
+// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable FP8 to Half-Precision Matrix Multiplication
+// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable FP8 to Single-Precision Matrix Multiplication
// CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions
// CHECK-NEXT: flagm FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
// CHECK-NEXT: fp FEAT_FP Enable Armv8.0-A Floating Point Extensions
@@ -26,6 +29,7 @@
// CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable FP8 2-way dot instructions
// CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable FP8 4-way dot instructions
// CHECK-NEXT: fp8fma FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions
+// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable A64 base convert instructions for SIMD&FP scalar register operands of different input and output sizes
// CHECK-NEXT: fp16 FEAT_FP16 Enable half-precision floating-point data processing
// CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension
// CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension
@@ -35,6 +39,7 @@
// CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
// CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
// CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions
+// CHECK-NEXT: lsfe FEAT_LSFE Enable A64 base Atomic floating-point in-memory instructions
// CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions
// CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
@@ -64,20 +69,26 @@
// CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions
// CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions
// CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions
+// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Scalable Matrix Extension 2.2 instructions
// CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension
// CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
+// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable SVE2 AES support in streaming SVE mode
// CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable SVE multi-vector AES and 128-bit PMULL instructions
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
+// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable SVE BFloat16 scaling instructions
+// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable FP16 to FP32 Matrix Multiply
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
// CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions
// CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions
// CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions
+// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Scalable Vector Extension 2.2 instructions
// CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension
// CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State
// CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 97671bc59f6b9e..10dbf900271a32 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -522,6 +522,39 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
// Armv9.6 Architecture Extensions
//===----------------------------------------------------------------------===//
+def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
+ "Enable A64 base compare and branch instructions">;
+
+def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
+ "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
+
+def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
+ "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
+
+def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
+ "Enable A64 base convert instructions for SIMD&FP scalar register operands of"
+ " different input and output sizes", [FeatureFPARMv8]>;
+
+def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
+ "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
+
+def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
+ "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
+
+def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
+ "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
+
+def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
+ "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
+
+def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
+ "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
+
+def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
+ "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
+
+def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
+ "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
//===----------------------------------------------------------------------===//
// Other Features
@@ -833,8 +866,8 @@ def HasV9_5aOps : Architecture64<9, 5, "a", "v9.5a",
[HasV9_4aOps, FeatureCPA],
!listconcat(HasV9_4aOps.DefaultExts, [FeatureCPA, FeatureLUT, FeatureFAMINMAX])>;
def HasV9_6aOps : Architecture64<9, 6, "a", "v9.6a",
- [HasV9_5aOps],
- !listconcat(HasV9_5aOps.DefaultExts, [])>;
+ [HasV9_5aOps, FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2],
+ !listconcat(HasV9_5aOps.DefaultExts, [FeatureCMPBR, FeatureFPRCVT, FeatureSVE2p2])>;
def HasV8_0rOps : Architecture64<8, 0, "r", "v8r",
[ //v8.1
FeatureCRC, FeaturePAN, FeatureLSE, FeatureCONTEXTIDREL2,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 32f2c7c71d1757..6c9f0986b9e349 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -213,12 +213,35 @@ def HasSMEF8F16 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8
def HasSMEF8F32 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
+def HasCMPBR : Predicate<"Subtarget->hasCMPBR()">,
+ AssemblerPredicateWithAll<(all_of FeatureCMPBR), "cmpbr">;
+def HasF8F32MM : Predicate<"Subtarget->hasF8F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF8F32MM), "f8f32mm">;
+def HasF8F16MM : Predicate<"Subtarget->hasF8F16MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureF8F16MM), "f8f16mm">;
+def HasFPRCVT : Predicate<"Subtarget->hasFPRCVT()">,
+ AssemblerPredicateWithAll<(all_of FeatureFPRCVT), "fprcvt">;
+def HasLSFE : Predicate<"Subtarget->hasLSFE()">,
+ AssemblerPredicateWithAll<(all_of FeatureLSFE), "lsfe">;
+def HasSME2p2 : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSME2p2), "sme2p2">;
+def HasSVEAES2 : Predicate<"Subtarget->hasSVEAES2()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVEAES2), "sve-aes2">;
+def HasSVEBFSCALE : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSVEBFSCALE()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVEBFSCALE), "sve-bfscale">;
+def HasSVE_F16F32MM : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE_F16F32MM()">,
+ AssemblerPredicateWithAll<(all_of FeatureSVE_F16F32MM), "sve-f16f32mm">;
// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
// they should be enabled if either has been specified.
def HasSVEorSME
: Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
"sve or sme">;
+def HasSVEorSME2p2
+ : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE()) ||"
+ "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSME2p2())">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME2p2),
+ "sve or sme2p2">;
def HasSVE2orSME
: Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
@@ -227,6 +250,10 @@ def HasSVE2orSME2
: Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
"sve2 or sme2">;
+def HasSVE2orSSVE_AES
+ : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2()) ||"
+ "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSSVE_AES), "sve2 or ssve-aes">;
def HasSVE2p1_or_HasSME
: Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
@@ -236,7 +263,13 @@ def HasSVE2p1_or_HasSME2
def HasSVE2p1_or_HasSME2p1
: Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
-
+def HasSVE2p2orSME2p2
+ : Predicate<"Subtarget->isSVEorStreamingSVEAvailable() && (Subtarget->hasSVE2p2() || Subtarget->hasSME2p2())">,
+ AssemblerPredicateWithAll<(any_of FeatureSME2p2, FeatureSVE2p2), "sme2p2 or sve2p2">;
+def HasSVE2p1orSSVE_AES
+ : Predicate<"(Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()) ||"
+ "(Subtarget->isSVEorStreamingSVEAvailable() && Subtarget->hasSSVE_AES())">,
+ AssemblerPredicateWithAll<(any_of FeatureSVE2p1, FeatureSSVE_AES), "sve2p1 or ssve-aes">;
def HasSMEF16F16orSMEF8F16
: Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 87b78d502780d1..a9869f677c52d3 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1326,8 +1326,12 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
AArch64::AEK_CPA, AArch64::AEK_PAUTHLR,
AArch64::AEK_TLBIW, AArch64::AEK_JSCVT,
AArch64::AEK_FCMA, AArch64::AEK_FP8,
- AArch64::AEK_SVEB16B16,
- };
+ AArch64::AEK_SVEB16B16, AArch64::AEK_SVE2P2,
+ AArch64::AEK_SME2P2, AArch64::AEK_SVE_BFSCALE,
+ AArch64::AEK_SVE_F16F32MM, AArch64::AEK_SVE_AES2,
+ AArch64::AEK_SSVE_AES, AArch64::AEK_F8F32MM,
+ AArch64::AEK_F8F16MM, AArch64::AEK_LSFE,
+ AArch64::AEK_FPRCVT, AArch64::AEK_CMPBR};
std::vector<StringRef> Features;
@@ -1359,12 +1363,16 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+ras"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sha3"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-bitperm"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sve-aes2"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+ssve-aes"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2p1"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sve2p2"));
EXPECT_TRUE(llvm::is_contained(Features, "+rcpc"));
EXPECT_TRUE(llvm::is_contained(Features, "+rand"));
EXPECT_TRUE(llvm::is_contained(Features, "+mte"));
@@ -1387,6 +1395,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+sme2"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme-b16b16"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme2p1"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sme2p2"));
EXPECT_TRUE(llvm::is_contained(Features, "+hbc"));
EXPECT_TRUE(llvm::is_contained(Features, "+mops"));
EXPECT_TRUE(llvm::is_contained(Features, "+perfmon"));
@@ -1406,6 +1415,8 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot2"));
EXPECT_TRUE(llvm::is_contained(Features, "+fp8dot4"));
EXPECT_TRUE(llvm::is_contained(Features, "+ssve-fp8dot4"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+f8f32mm"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+f8f16mm"));
EXPECT_TRUE(llvm::is_contained(Features, "+lut"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme-lutv2"));
EXPECT_TRUE(llvm::is_contained(Features, "+sme-f8f16"));
@@ -1416,6 +1427,9 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+tlbiw"));
EXPECT_TRUE(llvm::is_contained(Features, "+jsconv"));
EXPECT_TRUE(llvm::is_contained(Features, "+complxnum"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+lsfe"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+fprcvt"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+cmpbr"));
// Assuming we listed every extension above, this should produce the same
// result.
@@ -1513,12 +1527,16 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"rdm", "nordm", "+rdm", "-rdm"},
{"sve", "nosve", "+sve", "-sve"},
{"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
+ {"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"},
{"sve2", "nosve2", "+sve2", "-sve2"},
{"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
{"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
{"sve2-sha3", "nosve2-sha3", "+sve2-sha3", "-sve2-sha3"},
{"sve2p1", "nosve2p1", "+sve2p1", "-sve2p1"},
+ {"sve2p2", "nosve2p2", "+sve2p2", "-sve2p2"},
{"sve2-bitperm", "nosve2-bitperm", "+sve2-bitperm", "-sve2-bitperm"},
+ {"sve-aes2", "nosve-aes2", "+sve-aes2", "-sve-aes2"},
+ {"ssve-aes", "nossve-aes", "+ssve-aes", "-ssve-aes"},
{"dotprod", "nodotprod", "+dotprod", "-dotprod"},
{"rcpc", "norcpc", "+rcpc", "-rcpc"},
{"rng", "norng", "+rand", "-rand"},
@@ -1531,6 +1549,8 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"i8mm", "noi8mm", "+i8mm", "-i8mm"},
{"f32mm", "nof32mm", "+f32mm", "-f32mm"},
{"f64mm", "nof64mm", "+f64mm", "-f64mm"},
+ {"f8f32mm", "nof8f32mm", "+f8f32mm", "-f8f32mm"},
+ {"f8f16mm", "nof8f16mm", "+f8f16mm", "-f8f16mm"},
{"sme", "nosme", "+sme", "-sme"},
{"sme-fa64", "nosme-fa64", "+sme-fa64", "-sme-fa64"},
{"sme-f64f64", "nosme-f64f64", "+sme-f64f64", "-sme-f64f64"},
@@ -1539,6 +1559,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"sme2", "nosme2", "+sme2", "-sme2"},
{"sme-b16b16", "nosme-b16b16", "+sme-b16b16", "-sme-b16b16"},
{"sme2p1", "nosme2p1", "+sme2p1", "-sme2p1"},
+ {"sme2p2", "nosme2p2", "+sme2p2", "-sme2p2"},
{"hbc", "nohbc", "+hbc", "-hbc"},
{"mops", "nomops", "+mops", "-mops"},
{"pmuv3", "nopmuv3", "+perfmon", "-perfmon"},
@@ -1557,7 +1578,9 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"sme-lutv2", "nosme-lutv2", "+sme-lutv2", "-sme-lutv2"},
{"sme-f8f16", "nosme-f8f16", "+sme-f8f16", "-sme-f8f16"},
{"sme-f8f32", "nosme-f8f32", "+sme-f8f32", "-sme-f8f32"},
- };
+ {"lsfe", "nolsfe", "+lsfe", "-lsfe"},
+ {"fprcvt", "nofprcvt", "+fprcvt", "-fprcvt"},
+ {"cmpbr", "nocmpbr", "+cmpbr", "-cmpbr"}};
for (unsigned i = 0; i < std::size(ArchExt); i++) {
EXPECT_EQ(StringRef(ArchExt[i][2]),
@@ -1747,6 +1770,14 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nofp", "jscvt"}, {"fp-armv8", "jsconv"}, {}},
{AArch64::ARMV8A, {"jscvt", "nofp"}, {}, {"fp-armv8", "jsconv"}},
+ // fp -> lsfe
+ {AArch64::ARMV9_6A, {"nofp", "lsfe"}, {"fp-armv8", "lsfe"}, {}},
+ {AArch64::ARMV9_6A, {"lsfe", "nofp"}, {}, {"fp-armv8", "lsfe"}},
+
+ // fp -> fprcvt
+ {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}},
+ {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}},
+
// simd -> {aes, sha2, sha3, sm4}
{AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}},
{AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}},
@@ -1783,7 +1814,7 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nosve", "f64mm"}, {"sve", "f64mm"}, {}},
{AArch64::ARMV8A, {"f64mm", "nosve"}, {}, {"sve", "f64mm"}},
- // sve2 -> {sve2p1, sve2-bitperm, sve2-aes, sve2-sha3, sve2-sm4}
+ // sve2 -> {sve2p1, sve2-bitperm, sve2-sha3, sve2-sm4}
{AArch64::ARMV8A, {"nosve2", "sve2p1"}, {"sve2", "sve2p1"}, {}},
{AArch64::ARMV8A, {"sve2p1", "nosve2"}, {}, {"sve2", "sve2p1"}},
{AArch64::ARMV8A,
@@ -1794,22 +1825,52 @@ AArch64ExtensionDependenciesBaseArchTestParams
{"sve2-bitperm", "nosve2"},
{},
{"sve2", "sve2-bitperm"}},
- {AArch64::ARMV8A, {"nosve2", "sve2-aes"}, {"sve2", "sve2-aes"}, {}},
- {AArch64::ARMV8A, {"sve2-aes", "nosve2"}, {}, {"sve2", "sve2-aes"}},
{AArch64::ARMV8A, {"nosve2", "sve2-sha3"}, {"sve2", "sve2-sha3"}, {}},
{AArch64::ARMV8A, {"sve2-sha3", "nosve2"}, {}, {"sve2", "sve2-sha3"}},
{AArch64::ARMV8A, {"nosve2", "sve2-sm4"}, {"sve2", "sve2-sm4"}, {}},
{AArch64::ARMV8A, {"sve2-sm4", "nosve2"}, {}, {"sve2", "sve2-sm4"}},
- // sve-b16b16 -> {sme-b16b16}
+ // sve-b16b16 -> {sme-b16b16, sve-bfscale}
{AArch64::ARMV8A,
{"nosve-b16b16", "sme-b16b16"},
{"sve-b16b16", "sme-b16b16"},
{}},
- {AArch64::ARMV8A,
+ {AArch64::ARMV9_6A,
{"sme-b16b16", "nosve-b16b16"},
{},
{"sve-b16b16", "sme-b16b16"}},
+ {AArch64::ARMV9_6A,
+ {"nosve-b16b16", "sve-bfscale"},
+ {"sve-b16b16", "sve-bfscale"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"sve-bfscale", "nosve-b16b16"},
+ {},
+ {"sve-b16b16", "sve-bfscale"}},
+
+ // sve2p1 -> {sve2p2}
+ {AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}},
+ {AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}},
+
+ // sve2p1 -> sve-f16f32mm
+ {AArch64::ARMV9_6A,
+ {"nosve2p1", "sve-f16f32mm"},
+ {"sve2p1", "sve-f16f32mm"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"sve-f16f32mm", "nosve2p1"},
+ {},
+ {"sve2p1", "sve-f16f32mm"}},
+
+ // sve2-aes -> {sve-aes2}
+ {AArch64::ARMV9_6A,
+ {"nosve2-aes", "sve-aes2"},
+ {"sve2-aes", "sve-aes2"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"sve-aes2", "nosve2-aes"},
+ {},
+ {"sve2-aes", "sve-aes2"}},
// sme -> {sme2, sme-f16f16, sme-f64f64, sme-i16i64, sme-fa64}
{AArch64::ARMV8A, {"nosme", "sme2"}, {"sme", "sme2"}, {}},
@@ -1858,6 +1919,18 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}},
{AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}},
+ // sme2p1 -> {sme2p2, ssve-aes}
+ {AArch64::ARMV9_6A, {"nosme2p1", "sme2p2"}, {"sme2p2", "sme2p1"}, {}},
+ {AArch64::ARMV9_6A, {"sme2p2", "nosme2p1"}, {}, {"sme2p1", "sme2p2"}},
+ {AArch64::ARMV9_6A,
+ {"nosme2p1", "ssve-aes"},
+ {"sme2p1", "ssve-aes"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"ssve-aes", "nosme2p1"},
+ {},
+ {"ssve-aes", "sme2p1"}},
+
// fp8 -> {sme-f8f16, sme-f8f32}
{AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}},
{AArch64::ARMV8A, {"sme-f8f16", "nofp8"}, {}, {"fp8", "sme-f8f16"}},
@@ -1885,6 +1958,36 @@ AArch64ExtensionDependenciesBaseArchTestParams
// rcpc -> rcpc3
{AArch64::ARMV8A, {"norcpc", "rcpc3"}, {"rcpc", "rcpc3"}, {}},
{AArch64::ARMV8A, {"rcpc3", "norcpc"}, {}, {"rcpc", "rcpc3"}},
+
+ // fp8dot4 -> f8f32mm
+ {AArch64::ARMV9_6A,
+ {"nofp8dot4", "f8f32mm"},
+ {"fp8dot4", "f8f32mm"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"f8f32mm", "nofp8dot4"},
+ {},
+ {"f8f32mm", "fp8dot4"}},
+
+ // f8f32mm -> f8f16mm
+ {AArch64::ARMV9_6A,
+ {"nof8f32mm", "f8f16mm"},
+ {"f8f16mm", "f8f32mm"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"f8f16mm", "nof8f32mm"},
+ {},
+ {"f8f16mm", "f8f32mm"}},
+
+ // fp8dot2 -> f8f16mm
+ {AArch64::ARMV9_6A,
+ {"nofp8dot2", "f8f16mm"},
+ {"f8f16mm", "fp8dot2"},
+ {}},
+ {AArch64::ARMV9_6A,
+ {"f8f16mm", "nofp8dot2"},
+ {},
+ {"f8f16mm", "fp8dot2"}},
};
INSTANTIATE_TEST_SUITE_P(
>From 77154d6345846dc95b4f566c0b9c17d893233883 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 11 Oct 2024 12:52:36 +0000
Subject: [PATCH 2/4] Add Armv-9.6A to feature description and add missing
tests for sve-f16f32mm
---
llvm/lib/Target/AArch64/AArch64Features.td | 22 +++++++++----------
.../TargetParser/TargetParserTest.cpp | 5 +++--
2 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 10dbf900271a32..0f45cf481a8ada 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -523,38 +523,38 @@ def FeatureTLBIW : ExtensionWithMArch<"tlbiw", "TLBIW", "FEAT_TLBIW",
//===----------------------------------------------------------------------===//
def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
- "Enable A64 base compare and branch instructions">;
+ "Enable Armv9.6-A base compare and branch instructions">;
def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
- "Enable FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
+ "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
- "Enable FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
+ "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
- "Enable A64 base convert instructions for SIMD&FP scalar register operands of"
+ "Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of"
" different input and output sizes", [FeatureFPARMv8]>;
def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
- "Enable A64 base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
+ "Enable Armv9.6-A base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
- "Enable Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
+ "Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
- "Enable SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
+ "Enable Armv9.6-A SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
- "Enable Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
+ "Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
- "Enable SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
+ "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
- "Enable SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
+ "Enable Armv9.6-A SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
- "Enable FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
+ "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
//===----------------------------------------------------------------------===//
// Other Features
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index a9869f677c52d3..87f4ed970165f8 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1364,6 +1364,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
EXPECT_TRUE(llvm::is_contained(Features, "+sve"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve-b16b16"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve-bfscale"));
+ EXPECT_TRUE(llvm::is_contained(Features, "+sve-f16f32mm"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-aes"));
EXPECT_TRUE(llvm::is_contained(Features, "+sve2-sm4"));
@@ -1528,6 +1529,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
{"sve", "nosve", "+sve", "-sve"},
{"sve-b16b16", "nosve-b16b16", "+sve-b16b16", "-sve-b16b16"},
{"sve-bfscale", "nosve-bfscale", "+sve-bfscale", "-sve-bfscale"},
+ {"sve-f16f32mm", "nosve-f16f32mm", "+sve-f16f32mm", "-sve-f16f32mm"},
{"sve2", "nosve2", "+sve2", "-sve2"},
{"sve2-aes", "nosve2-aes", "+sve2-aes", "-sve2-aes"},
{"sve2-sm4", "nosve2-sm4", "+sve2-sm4", "-sve2-sm4"},
@@ -1848,11 +1850,10 @@ AArch64ExtensionDependenciesBaseArchTestParams
{},
{"sve-b16b16", "sve-bfscale"}},
- // sve2p1 -> {sve2p2}
+ // sve2p1 -> {sve2p2, sve-f16f32mm}
{AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}},
{AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}},
- // sve2p1 -> sve-f16f32mm
{AArch64::ARMV9_6A,
{"nosve2p1", "sve-f16f32mm"},
{"sve2p1", "sve-f16f32mm"},
>From af18caa2f50583b9c4e0ac50de550ec265fed5c9 Mon Sep 17 00:00:00 2001
From: Marian Lukac <Marian.Lukac at arm.com>
Date: Fri, 11 Oct 2024 15:23:59 +0000
Subject: [PATCH 3/4] Fix test
---
.../print-supported-extensions-aarch64.c | 22 +++++++++----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index c582154bd0370d..fbc0d70c4901c9 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -8,7 +8,7 @@
// CHECK-NEXT: bf16 FEAT_BF16 Enable BFloat16 Extension
// CHECK-NEXT: brbe FEAT_BRBE Enable Branch Record Buffer Extension
// CHECK-NEXT: bti FEAT_BTI Enable Branch Target Identification
-// CHECK-NEXT: cmpbr FEAT_CMPBR Enable A64 base compare and branch instructions
+// CHECK-NEXT: cmpbr FEAT_CMPBR Enable Armv9.6-A base compare and branch instructions
// CHECK-NEXT: fcma FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
// CHECK-NEXT: cpa FEAT_CPA Enable Armv9.5-A Checked Pointer Arithmetic
// CHECK-NEXT: crc FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
@@ -19,8 +19,8 @@
// CHECK-NEXT: dotprod FEAT_DotProd Enable dot product support
// CHECK-NEXT: f32mm FEAT_F32MM Enable Matrix Multiply FP32 Extension
// CHECK-NEXT: f64mm FEAT_F64MM Enable Matrix Multiply FP64 Extension
-// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable FP8 to Half-Precision Matrix Multiplication
-// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable FP8 to Single-Precision Matrix Multiplication
+// CHECK-NEXT: f8f16mm FEAT_F8F16MM Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication
+// CHECK-NEXT: f8f32mm FEAT_F8F32MM Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication
// CHECK-NEXT: faminmax FEAT_FAMINMAX Enable FAMIN and FAMAX instructions
// CHECK-NEXT: flagm FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
// CHECK-NEXT: fp FEAT_FP Enable Armv8.0-A Floating Point Extensions
@@ -29,7 +29,7 @@
// CHECK-NEXT: fp8dot2 FEAT_FP8DOT2 Enable FP8 2-way dot instructions
// CHECK-NEXT: fp8dot4 FEAT_FP8DOT4 Enable FP8 4-way dot instructions
// CHECK-NEXT: fp8fma FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions
-// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable A64 base convert instructions for SIMD&FP scalar register operands of different input and output sizes
+// CHECK-NEXT: fprcvt FEAT_FPRCVT Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of different input and output sizes
// CHECK-NEXT: fp16 FEAT_FP16 Enable half-precision floating-point data processing
// CHECK-NEXT: gcs FEAT_GCS Enable Armv9.4-A Guarded Call Stack Extension
// CHECK-NEXT: hbc FEAT_HBC Enable Armv8.8-A Hinted Conditional Branches Extension
@@ -39,7 +39,7 @@
// CHECK-NEXT: ls64 FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
// CHECK-NEXT: lse FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
// CHECK-NEXT: lse128 FEAT_LSE128 Enable Armv9.4-A 128-bit Atomic instructions
-// CHECK-NEXT: lsfe FEAT_LSFE Enable A64 base Atomic floating-point in-memory instructions
+// CHECK-NEXT: lsfe FEAT_LSFE Enable Armv9.6-A base Atomic floating-point in-memory instructions
// CHECK-NEXT: lut FEAT_LUT Enable Lookup Table instructions
// CHECK-NEXT: mops FEAT_MOPS Enable Armv8.8-A memcpy and memset acceleration instructions
// CHECK-NEXT: memtag FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
@@ -69,26 +69,26 @@
// CHECK-NEXT: sme-lutv2 FEAT_SME_LUTv2 Enable Scalable Matrix Extension (SME) LUTv2 instructions
// CHECK-NEXT: sme2 FEAT_SME2 Enable Scalable Matrix Extension 2 (SME2) instructions
// CHECK-NEXT: sme2p1 FEAT_SME2p1 Enable Scalable Matrix Extension 2.1 instructions
-// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Scalable Matrix Extension 2.2 instructions
+// CHECK-NEXT: sme2p2 FEAT_SME2p2 Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions
// CHECK-NEXT: profile FEAT_SPE Enable Statistical Profiling extension
// CHECK-NEXT: predres2 FEAT_SPECRES2 Enable Speculation Restriction Instruction
// CHECK-NEXT: ssbs FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
-// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable SVE2 AES support in streaming SVE mode
+// CHECK-NEXT: ssve-aes FEAT_SSVE_AES Enable Armv9.6-A SVE2 AES support in streaming SVE mode
// CHECK-NEXT: ssve-fp8dot2 FEAT_SSVE_FP8DOT2 Enable SVE2 FP8 2-way dot product instructions
// CHECK-NEXT: ssve-fp8dot4 FEAT_SSVE_FP8DOT4 Enable SVE2 FP8 4-way dot product instructions
// CHECK-NEXT: ssve-fp8fma FEAT_SSVE_FP8FMA Enable SVE2 FP8 multiply-add instructions
// CHECK-NEXT: sve FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
-// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable SVE multi-vector AES and 128-bit PMULL instructions
+// CHECK-NEXT: sve-aes2 FEAT_SVE_AES2 Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions
// CHECK-NEXT: sve-b16b16 FEAT_SVE_B16B16 Enable SVE2 non-widening and SME2 Z-targeting non-widening BFloat16 instructions
-// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable SVE BFloat16 scaling instructions
-// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable FP16 to FP32 Matrix Multiply
+// CHECK-NEXT: sve-bfscale FEAT_SVE_BFSCALE Enable Armv9.6-A SVE BFloat16 scaling instructions
+// CHECK-NEXT: sve-f16f32mm FEAT_SVE_F16F32MM Enable Armv9.6-A FP16 to FP32 Matrix Multiply
// CHECK-NEXT: sve2 FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
// CHECK-NEXT: sve2-aes FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable AES SVE2 instructions
// CHECK-NEXT: sve2-bitperm FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
// CHECK-NEXT: sve2-sha3 FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions
// CHECK-NEXT: sve2-sm4 FEAT_SVE_SM4 Enable SM4 SVE2 instructions
// CHECK-NEXT: sve2p1 FEAT_SVE2p1 Enable Scalable Vector Extension 2.1 instructions
-// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Scalable Vector Extension 2.2 instructions
+// CHECK-NEXT: sve2p2 FEAT_SVE2p2 Enable Armv9.6-A Scalable Vector Extension 2.2 instructions
// CHECK-NEXT: the FEAT_THE Enable Armv8.9-A Translation Hardening Extension
// CHECK-NEXT: tlbiw FEAT_TLBIW Enable Armv9.5-A TLBI VMALL for Dirty State
// CHECK-NEXT: tme FEAT_TME Enable Transactional Memory Extension
>From b18bd82d318c406d483ced487cc049e6ae0b945b Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Tue, 15 Oct 2024 12:36:51 +0000
Subject: [PATCH 4/4] Remove ext dependencies and test armv9.6 driver behavior
---
clang/test/Driver/aarch64-v96a.c | 42 +++++++++-
llvm/lib/Target/AArch64/AArch64Features.td | 16 ++--
.../TargetParser/TargetParserTest.cpp | 83 ++-----------------
3 files changed, 52 insertions(+), 89 deletions(-)
diff --git a/clang/test/Driver/aarch64-v96a.c b/clang/test/Driver/aarch64-v96a.c
index 0aaadddb2842f8..80c99be934334e 100644
--- a/clang/test/Driver/aarch64-v96a.c
+++ b/clang/test/Driver/aarch64-v96a.c
@@ -6,7 +6,7 @@
// RUN: %clang -target aarch64 -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
// RUN: %clang -target aarch64_be -mlittle-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A %s
-// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"
+// GENERICV96A: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2"
// RUN: %clang -target aarch64_be -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
@@ -14,6 +14,42 @@
// RUN: %clang -target aarch64 -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
// RUN: %clang -target aarch64_be -mbig-endian -march=armv9.6-a -### -c %s 2>&1 | FileCheck -check-prefix=GENERICV96A-BE %s
-// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"
-//
+// GENERICV96A-BE: "-cc1"{{.*}} "-triple" "aarch64_be{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+cmpbr"{{.*}} "-target-feature" "+fprcvt"{{.*}} "-target-feature" "+sve2p2"
+
// ===== Features supported on aarch64 =====
+
+// RUN: %clang -target aarch64 -march=armv9.6a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+f8f16mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F16MM %s
+// V96A-F8F16MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f16mm"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+f8f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-F8F32MM %s
+// V96A-F8F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+f8f32mm"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+lsfe -### -c %s 2>&1 | FileCheck -check-prefix=V96A-LSFE %s
+// V96A-LSFE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+lsfe"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+sme2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SME2p2 %s
+// V96A-SME2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sme2p2"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+ssve-aes -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SSVE-AES %s
+// V96A-SSVE-AES: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+ssve-aes"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+sve2p2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE2p2 %s
+// V96A-SVE2p2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve2p2"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+sve-aes2 -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-AES2 %s
+// V96A-SVE-AES2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-aes2"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+sve-bfscale -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-BFSCALE %s
+// V96A-SVE-BFSCALE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-bfscale"
+
+// RUN: %clang -target aarch64 -march=armv9.6a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
+// RUN: %clang -target aarch64 -march=armv9.6-a+sve-f16f32mm -### -c %s 2>&1 | FileCheck -check-prefix=V96A-SVE-F16F32MM %s
+// V96A-SVE-F16F32MM: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic" "-target-feature" "+v9.6a"{{.*}} "-target-feature" "+sve-f16f32mm"
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 0f45cf481a8ada..831f311b236441 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -526,35 +526,35 @@ def FeatureCMPBR : ExtensionWithMArch<"cmpbr", "CMPBR", "FEAT_CMPBR",
"Enable Armv9.6-A base compare and branch instructions">;
def FeatureF8F32MM: ExtensionWithMArch<"f8f32mm", "F8F32MM", "FEAT_F8F32MM",
- "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication", [FeatureFP8DOT4]>;
+ "Enable Armv9.6-A FP8 to Single-Precision Matrix Multiplication">;
def FeatureF8F16MM: ExtensionWithMArch<"f8f16mm", "F8F16MM", "FEAT_F8F16MM",
- "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication", [FeatureFP8DOT2, FeatureF8F32MM]>;
+ "Enable Armv9.6-A FP8 to Half-Precision Matrix Multiplication">;
def FeatureFPRCVT: ExtensionWithMArch<"fprcvt", "FPRCVT", "FEAT_FPRCVT",
"Enable Armv9.6-A base convert instructions for SIMD&FP scalar register operands of"
- " different input and output sizes", [FeatureFPARMv8]>;
+ " different input and output sizes">;
def FeatureLSFE : ExtensionWithMArch<"lsfe", "LSFE", "FEAT_LSFE",
- "Enable Armv9.6-A base Atomic floating-point in-memory instructions", [FeatureFPARMv8]>;
+ "Enable Armv9.6-A base Atomic floating-point in-memory instructions">;
def FeatureSME2p2: ExtensionWithMArch<"sme2p2", "SME2p2", "FEAT_SME2p2",
"Enable Armv9.6-A Scalable Matrix Extension 2.2 instructions", [FeatureSME2p1]>;
def FeatureSSVE_AES : ExtensionWithMArch<"ssve-aes", "SSVE_AES", "FEAT_SSVE_AES",
- "Enable Armv9.6-A SVE2 AES support in streaming SVE mode", [FeatureSME2p1]>;
+ "Enable Armv9.6-A SVE2 AES support in streaming SVE mode">;
def FeatureSVE2p2 : ExtensionWithMArch<"sve2p2", "SVE2p2", "FEAT_SVE2p2",
"Enable Armv9.6-A Scalable Vector Extension 2.2 instructions", [FeatureSVE2p1]>;
def FeatureSVEAES2: ExtensionWithMArch<"sve-aes2", "SVE_AES2", "FEAT_SVE_AES2",
- "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions", [FeatureSVE2AES]>;
+ "Enable Armv9.6-A SVE multi-vector AES and 128-bit PMULL instructions">;
def FeatureSVEBFSCALE: ExtensionWithMArch<"sve-bfscale", "SVE_BFSCALE", "FEAT_SVE_BFSCALE",
- "Enable Armv9.6-A SVE BFloat16 scaling instructions", [FeatureSVEB16B16]>;
+ "Enable Armv9.6-A SVE BFloat16 scaling instructions">;
def FeatureSVE_F16F32MM: ExtensionWithMArch<"sve-f16f32mm", "SVE_F16F32MM", "FEAT_SVE_F16F32MM",
- "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions", [FeatureSVE2p1]>;
+ "Enable Armv9.6-A FP16 to FP32 Matrix Multiply instructions">;
//===----------------------------------------------------------------------===//
// Other Features
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 87f4ed970165f8..369e5346348014 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1772,14 +1772,6 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nofp", "jscvt"}, {"fp-armv8", "jsconv"}, {}},
{AArch64::ARMV8A, {"jscvt", "nofp"}, {}, {"fp-armv8", "jsconv"}},
- // fp -> lsfe
- {AArch64::ARMV9_6A, {"nofp", "lsfe"}, {"fp-armv8", "lsfe"}, {}},
- {AArch64::ARMV9_6A, {"lsfe", "nofp"}, {}, {"fp-armv8", "lsfe"}},
-
- // fp -> fprcvt
- {AArch64::ARMV9_6A, {"nofp", "fprcvt"}, {"fp-armv8", "fprcvt"}, {}},
- {AArch64::ARMV9_6A, {"fprcvt", "nofp"}, {}, {"fp-armv8", "fprcvt"}},
-
// simd -> {aes, sha2, sha3, sm4}
{AArch64::ARMV8A, {"nosimd", "aes"}, {"neon", "aes"}, {}},
{AArch64::ARMV8A, {"aes", "nosimd"}, {}, {"neon", "aes"}},
@@ -1832,47 +1824,20 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nosve2", "sve2-sm4"}, {"sve2", "sve2-sm4"}, {}},
{AArch64::ARMV8A, {"sve2-sm4", "nosve2"}, {}, {"sve2", "sve2-sm4"}},
- // sve-b16b16 -> {sme-b16b16, sve-bfscale}
- {AArch64::ARMV8A,
+ // sve-b16b16 -> {sme-b16b16}
+ {AArch64::ARMV9_4A,
{"nosve-b16b16", "sme-b16b16"},
{"sve-b16b16", "sme-b16b16"},
{}},
- {AArch64::ARMV9_6A,
+ {AArch64::ARMV9_4A,
{"sme-b16b16", "nosve-b16b16"},
{},
{"sve-b16b16", "sme-b16b16"}},
- {AArch64::ARMV9_6A,
- {"nosve-b16b16", "sve-bfscale"},
- {"sve-b16b16", "sve-bfscale"},
- {}},
- {AArch64::ARMV9_6A,
- {"sve-bfscale", "nosve-b16b16"},
- {},
- {"sve-b16b16", "sve-bfscale"}},
- // sve2p1 -> {sve2p2, sve-f16f32mm}
+ // sve2p1 -> {sve2p2}
{AArch64::ARMV9_6A, {"nosve2p1", "sve2p2"}, {"sve2p1", "sve2p2"}, {}},
{AArch64::ARMV9_6A, {"sve2p2", "nosve2p1"}, {}, {"sve2p1", "sve2p2"}},
- {AArch64::ARMV9_6A,
- {"nosve2p1", "sve-f16f32mm"},
- {"sve2p1", "sve-f16f32mm"},
- {}},
- {AArch64::ARMV9_6A,
- {"sve-f16f32mm", "nosve2p1"},
- {},
- {"sve2p1", "sve-f16f32mm"}},
-
- // sve2-aes -> {sve-aes2}
- {AArch64::ARMV9_6A,
- {"nosve2-aes", "sve-aes2"},
- {"sve2-aes", "sve-aes2"},
- {}},
- {AArch64::ARMV9_6A,
- {"sve-aes2", "nosve2-aes"},
- {},
- {"sve2-aes", "sve-aes2"}},
-
// sme -> {sme2, sme-f16f16, sme-f64f64, sme-i16i64, sme-fa64}
{AArch64::ARMV8A, {"nosme", "sme2"}, {"sme", "sme2"}, {}},
{AArch64::ARMV8A, {"sme2", "nosme"}, {}, {"sme", "sme2"}},
@@ -1920,17 +1885,9 @@ AArch64ExtensionDependenciesBaseArchTestParams
{AArch64::ARMV8A, {"nosme2", "sme-b16b16"}, {"sme2", "sme-b16b16"}, {}},
{AArch64::ARMV8A, {"sme-b16b16", "nosme2"}, {}, {"sme2", "sme-b16b16"}},
- // sme2p1 -> {sme2p2, ssve-aes}
+ // sme2p1 -> {sme2p2}
{AArch64::ARMV9_6A, {"nosme2p1", "sme2p2"}, {"sme2p2", "sme2p1"}, {}},
{AArch64::ARMV9_6A, {"sme2p2", "nosme2p1"}, {}, {"sme2p1", "sme2p2"}},
- {AArch64::ARMV9_6A,
- {"nosme2p1", "ssve-aes"},
- {"sme2p1", "ssve-aes"},
- {}},
- {AArch64::ARMV9_6A,
- {"ssve-aes", "nosme2p1"},
- {},
- {"ssve-aes", "sme2p1"}},
// fp8 -> {sme-f8f16, sme-f8f32}
{AArch64::ARMV8A, {"nofp8", "sme-f8f16"}, {"fp8", "sme-f8f16"}, {}},
@@ -1959,36 +1916,6 @@ AArch64ExtensionDependenciesBaseArchTestParams
// rcpc -> rcpc3
{AArch64::ARMV8A, {"norcpc", "rcpc3"}, {"rcpc", "rcpc3"}, {}},
{AArch64::ARMV8A, {"rcpc3", "norcpc"}, {}, {"rcpc", "rcpc3"}},
-
- // fp8dot4 -> f8f32mm
- {AArch64::ARMV9_6A,
- {"nofp8dot4", "f8f32mm"},
- {"fp8dot4", "f8f32mm"},
- {}},
- {AArch64::ARMV9_6A,
- {"f8f32mm", "nofp8dot4"},
- {},
- {"f8f32mm", "fp8dot4"}},
-
- // f8f32mm -> f8f16mm
- {AArch64::ARMV9_6A,
- {"nof8f32mm", "f8f16mm"},
- {"f8f16mm", "f8f32mm"},
- {}},
- {AArch64::ARMV9_6A,
- {"f8f16mm", "nof8f32mm"},
- {},
- {"f8f16mm", "f8f32mm"}},
-
- // fp8dot2 -> f8f16mm
- {AArch64::ARMV9_6A,
- {"nofp8dot2", "f8f16mm"},
- {"f8f16mm", "fp8dot2"},
- {}},
- {AArch64::ARMV9_6A,
- {"f8f16mm", "nofp8dot2"},
- {},
- {"f8f16mm", "fp8dot2"}},
};
INSTANTIATE_TEST_SUITE_P(
More information about the cfe-commits
mailing list