[llvm] [NFC][AArch64] Organise extensions by archtecture version (PR #95898)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 08:30:53 PDT 2024
================
@@ -223,135 +221,509 @@ def FeatureSVE : Extension<"sve", "SVE",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16],
"FEAT_SVE", "+sve,+fullfp16,+fp-armv8,+neon", 310>;
-// This flag is currently still labeled as Experimental, but when fully
-// implemented this should tell the compiler to use the zeroing pseudos to
-// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
-// lanes are known to be zero. The pseudos will then be expanded using the
-// MOVPRFX instruction to zero the inactive lanes. This feature should only be
-// enabled if MOVPRFX instructions are known to merge with the destructive
-// operations they prefix.
-//
-// This feature could similarly be extended to support cheap merging of _any_
-// value into the inactive lanes using the MOVPRFX instruction that uses
-// merging-predication.
-def FeatureExperimentalZeroingPseudos
- : SubtargetFeature<"use-experimental-zeroing-pseudos",
- "UseExperimentalZeroingPseudos", "true",
- "Hint to the compiler that the MOVPRFX instruction is "
- "merged with destructive operations",
- []>;
+let ArchExtKindSpelling = "AEK_I8MM" in
+def FeatureMatMulInt8 : Extension<"i8mm", "MatMulInt8",
+ "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)", [],
+ "FEAT_I8MM", "+i8mm", 270>;
-def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
- "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+let ArchExtKindSpelling = "AEK_F32MM" in
+def FeatureMatMulFP32 : Extension<"f32mm", "MatMulFP32",
+ "Enable Matrix Multiply FP32 Extension (FEAT_F32MM)", [FeatureSVE],
+ "FEAT_SVE_F32MM", "+sve,+f32mm,+fullfp16,+fp-armv8,+neon", 350>;
-def FeatureBF16 : Extension<"bf16", "BF16",
- "Enable BFloat16 Extension (FEAT_BF16)", [],
- "FEAT_BF16", "+bf16", 280>;
+let ArchExtKindSpelling = "AEK_F64MM" in
+def FeatureMatMulFP64 : Extension<"f64mm", "MatMulFP64",
+ "Enable Matrix Multiply FP64 Extension (FEAT_F64MM)", [FeatureSVE],
+ "FEAT_SVE_F64MM", "+sve,+f64mm,+fullfp16,+fp-armv8,+neon", 360>;
-def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
- "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
+//===----------------------------------------------------------------------===//
+// Armv8.3 Architecture Extensions
+//===----------------------------------------------------------------------===//
-def FeatureSVE2 : Extension<"sve2", "SVE2",
- "Enable Scalable Vector Extension 2 (SVE2) instructions (FEAT_SVE2)",
- [FeatureSVE, FeatureUseScalarIncVL],
- "FEAT_SVE2", "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370>;
+def FeatureRCPC : Extension<"rcpc", "RCPC",
+ "Enable support for RCPC extension (FEAT_LRCPC)", [],
+ "FEAT_RCPC", "+rcpc", 230>;
-def FeatureSVE2AES : Extension<"sve2-aes", "SVE2AES",
- "Enable AES SVE2 instructions (FEAT_SVE_AES, FEAT_SVE_PMULL128)",
- [FeatureSVE2, FeatureAES],
- "FEAT_SVE_AES", "+sve2,+sve,+sve2-aes,+fullfp16,+fp-armv8,+neon", 380>;
+def FeaturePAuth : Extension<
+ "pauth", "PAuth",
+ "Enable v8.3-A Pointer Authentication extension (FEAT_PAuth)">;
-def FeatureSVE2SM4 : Extension<"sve2-sm4", "SVE2SM4",
- "Enable SM4 SVE2 instructions (FEAT_SVE_SM4)", [FeatureSVE2, FeatureSM4],
- "FEAT_SVE_SM4", "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420>;
+let ArchExtKindSpelling = "AEK_JSCVT", MArchName = "jscvt" in
+def FeatureJS : Extension<
+ "jsconv", "JS",
+ "Enable v8.3-A JavaScript FP conversion instructions (FEAT_JSCVT)",
+ [FeatureFPARMv8],
+ "FEAT_JSCVT", "+fp-armv8,+neon,+jsconv", 210>;
-def FeatureSVE2SHA3 : Extension<"sve2-sha3", "SVE2SHA3",
- "Enable SHA3 SVE2 instructions (FEAT_SVE_SHA3)", [FeatureSVE2, FeatureSHA3],
- "FEAT_SVE_SHA3", "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410>;
+def FeatureCCIDX : SubtargetFeature<
+ "ccidx", "HasCCIDX", "true",
+ "Enable v8.3-A Extend of the CCSIDR number of sets (FEAT_CCIDX)">;
-def FeatureSVE2BitPerm : Extension<"sve2-bitperm", "SVE2BitPerm",
- "Enable bit permutation SVE2 instructions (FEAT_SVE_BitPerm)", [FeatureSVE2],
- "FEAT_SVE_BITPERM", "+sve2,+sve,+sve2-bitperm,+fullfp16,+fp-armv8,+neon", 400>;
+let ArchExtKindSpelling = "AEK_FCMA", MArchName = "fcma" in
+def FeatureComplxNum : Extension<
+ "complxnum", "ComplxNum",
+ "Enable v8.3-A Floating-point complex number support (FEAT_FCMA)",
+ [FeatureNEON],
+ "FEAT_FCMA", "+fp-armv8,+neon,+complxnum", 220>;
-let FMVDependencies = "+sve2p1,+sve2,+sve,+fullfp16,+fp-armv8,+neon" in
-def FeatureSVE2p1: Extension<"sve2p1", "SVE2p1",
- "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
+def FeatureNV : SubtargetFeature<
+ "nv", "HasNV", "true",
+ "Enable v8.4-A Nested Virtualization Enchancement (FEAT_NV, FEAT_NV2)">;
-def FeatureB16B16 : Extension<"b16b16", "B16B16",
- "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;
+//===----------------------------------------------------------------------===//
+// Armv8.4 Architecture Extensions
+//===----------------------------------------------------------------------===//
-def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
- "Has zero-cycle register moves">;
+def FeatureLSE2 : SubtargetFeature<"lse2", "HasLSE2", "true",
+ "Enable ARMv8.4 Large System Extension 2 (LSE2) atomicity rules (FEAT_LSE2)">;
-def FeatureZCZeroingGP : SubtargetFeature<"zcz-gp", "HasZeroCycleZeroingGP", "true",
- "Has zero-cycle zeroing instructions for generic registers">;
+def FeatureFP16FML : Extension<"fp16fml", "FP16FML",
+ "Enable FP16 FML instructions (FEAT_FHM)", [FeatureFullFP16],
+ "FEAT_FP16FML", "+fp16fml,+fullfp16,+fp-armv8,+neon", 175>;
-// It is generally beneficial to rewrite "fmov s0, wzr" to "movi d0, #0".
-// as movi is more efficient across all cores. Newer cores can eliminate
-// fmovs early and there is no difference with movi, but this not true for
-// all implementations.
-def FeatureNoZCZeroingFP : SubtargetFeature<"no-zcz-fp", "HasZeroCycleZeroingFP", "false",
- "Has no zero-cycle zeroing instructions for FP registers">;
+def FeatureDotProd : Extension<
+ "dotprod", "DotProd",
+ "Enable dot product support (FEAT_DotProd)", [FeatureNEON],
+ "FEAT_DOTPROD", "+dotprod,+fp-armv8,+neon", 104>;
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions",
- [FeatureZCZeroingGP]>;
+def FeatureMPAM : SubtargetFeature<
+ "mpam", "HasMPAM", "true",
+ "Enable v8.4-A Memory system Partitioning and Monitoring extension (FEAT_MPAM)">;
-/// ... but the floating-point version doesn't quite work in rare cases on older
-/// CPUs.
-def FeatureZCZeroingFPWorkaround : SubtargetFeature<"zcz-fp-workaround",
- "HasZeroCycleZeroingFPWorkaround", "true",
- "The zero-cycle floating-point zeroing instruction has a bug">;
+def FeatureDIT : Extension<
+ "dit", "DIT",
+ "Enable v8.4-A Data Independent Timing instructions (FEAT_DIT)", [],
+ "FEAT_DIT", "+dit", 180>;
-def FeatureStrictAlign : SubtargetFeature<"strict-align",
- "RequiresStrictAlign", "true",
- "Disallow all unaligned memory "
- "access">;
+def FeatureTRACEV8_4 : SubtargetFeature<
+ "tracev8.4", "HasTRACEV8_4", "true",
+ "Enable v8.4-A Trace extension (FEAT_TRF)">;
-foreach i = {1-7,9-15,18,20-28} in
- def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true",
- "Reserve X"#i#", making it unavailable "
- "as a GPR">;
+def FeatureAM : SubtargetFeature<
+ "am", "HasAM", "true",
+ "Enable v8.4-A Activity Monitors extension (FEAT_AMUv1)">;
-foreach i = {8-15,18} in
- def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i,
- "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">;
+def FeatureSEL2 : SubtargetFeature<
+ "sel2", "HasSEL2", "true",
+ "Enable v8.4-A Secure Exception Level 2 extension (FEAT_SEL2)">;
-def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
- "true",
- "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+def FeatureTLB_RMI : SubtargetFeature<
+ "tlb-rmi", "HasTLB_RMI", "true",
+ "Enable v8.4-A TLB Range and Maintenance Instructions (FEAT_TLBIOS, FEAT_TLBIRANGE)">;
-def FeaturePredictableSelectIsExpensive : SubtargetFeature<
- "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
- "Prefer likely predicted branches over selects">;
+def FeatureFlagM : Extension<
+ "flagm", "FlagM",
+ "Enable v8.4-A Flag Manipulation Instructions (FEAT_FlagM)", [],
+ "FEAT_FLAGM", "+flagm", 20>;
-def FeatureEnableSelectOptimize : SubtargetFeature<
- "enable-select-opt", "EnableSelectOptimize", "true",
- "Enable the select optimize pass for select loop heuristics">;
+def FeatureRCPC_IMMO : SubtargetFeature<"rcpc-immo", "HasRCPC_IMMO", "true",
+ "Enable v8.4-A RCPC instructions with Immediate Offsets (FEAT_LRCPC2)",
+ [FeatureRCPC]>;
-def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
- "HasExynosCheapAsMoveHandling", "true",
- "Use Exynos specific handling of cheap instructions">;
+//===----------------------------------------------------------------------===//
+// Armv8.5 Architecture Extensions
+//===----------------------------------------------------------------------===//
-def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
- "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+def FeatureAltFPCmp : SubtargetFeature<"altnzcv", "HasAlternativeNZCV", "true",
+ "Enable alternative NZCV format for floating point comparisons (FEAT_FlagM2)">;
-def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
- "IsMisaligned128StoreSlow", "true", "Misaligned 128 bit stores are slow">;
+def FeatureFRInt3264 : SubtargetFeature<"fptoint", "HasFRInt3264", "true",
+ "Enable FRInt[32|64][Z|X] instructions that round a floating-point number to "
+ "an integer (in FP format) forcing it to fit into a 32- or 64-bit int (FEAT_FRINTTS)" >;
-def FeatureSlowPaired128 : SubtargetFeature<"slow-paired-128",
- "IsPaired128Slow", "true", "Paired 128 bit loads and stores are slow">;
+def FeatureSB : Extension<"sb", "SB",
+ "Enable v8.5 Speculation Barrier (FEAT_SB)", [],
+ "FEAT_SB", "+sb", 470>;
-def FeatureAscendStoreAddress : SubtargetFeature<"ascend-store-address",
- "IsStoreAddressAscend", "true",
- "Schedule vector stores by ascending address">;
+def FeatureSSBS : Extension<"ssbs", "SSBS",
+ "Enable Speculative Store Bypass Safe bit (FEAT_SSBS, FEAT_SSBS2)", [],
+ "FEAT_SSBS", "", 490>;
-def FeatureSlowSTRQro : SubtargetFeature<"slow-strqro-store", "IsSTRQroSlow",
- "true", "STR of Q register with register offset is slow">;
+def FeaturePredRes : Extension<"predres", "PredRes",
+ "Enable v8.5a execution and data prediction invalidation instructions (FEAT_SPECRES)", [],
+ "FEAT_PREDRES", "+predres", 480>;
-def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
- "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
- "true", "Use alternative pattern for sextload convert to f32">;
+def FeatureCacheDeepPersist : SubtargetFeature<"ccdp", "CCDP", "true",
+ "Enable v8.5 Cache Clean to Point of Deep Persistence (FEAT_DPB2)" >;
+
+let ArchExtKindSpelling = "AEK_NONE" in
+def FeatureBranchTargetId : Extension<"bti", "BTI",
+ "Enable Branch Target Identification (FEAT_BTI)", [],
+ "FEAT_BTI", "+bti", 510>;
+
+let ArchExtKindSpelling = "AEK_RAND", MArchName = "rng" in
+def FeatureRandGen : Extension<"rand", "RandGen",
+ "Enable Random Number generation instructions (FEAT_RNG)", [],
+ "FEAT_RNG", "+rand", 10>;
+
+// NOTE: "memtag" means FEAT_MTE + FEAT_MTE2 for -march or
+// __attribute((target(...))), but only FEAT_MTE for FMV.
+let MArchName = "memtag" in
+def FeatureMTE : Extension<"mte", "MTE",
+ "Enable Memory Tagging Extension (FEAT_MTE, FEAT_MTE2)", [],
+ "FEAT_MEMTAG", "", 440>;
+
+//===----------------------------------------------------------------------===//
+// Armv8.6 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureBF16 : Extension<"bf16", "BF16",
+ "Enable BFloat16 Extension (FEAT_BF16)", [],
+ "FEAT_BF16", "+bf16", 280>;
+
+def FeatureAMVS : SubtargetFeature<
+ "amvs", "HasAMVS", "true",
+ "Enable v8.6-A Activity Monitors Virtualization support (FEAT_AMUv1p1)",
+ [FeatureAM]>;
+
+def FeatureFineGrainedTraps : SubtargetFeature<"fgt", "HasFineGrainedTraps",
+ "true", "Enable fine grained virtualization traps extension (FEAT_FGT)">;
+
+def FeatureEnhancedCounterVirtualization :
+ SubtargetFeature<"ecv", "HasEnhancedCounterVirtualization",
+ "true", "Enable enhanced counter virtualization extension (FEAT_ECV)">;
+
+//===----------------------------------------------------------------------===//
+// Armv8.7 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureXS : SubtargetFeature<"xs", "HasXS",
+ "true", "Enable Armv8.7-A limited-TLB-maintenance instruction (FEAT_XS)">;
+
+def FeatureWFxT : Extension<"wfxt", "WFxT",
+ "Enable Armv8.7-A WFET and WFIT instruction (FEAT_WFxT)", [],
+ "FEAT_WFXT", "+wfxt", 550>;
+
+def FeatureHCX : SubtargetFeature<
+ "hcx", "HasHCX", "true", "Enable Armv8.7-A HCRX_EL2 system register (FEAT_HCX)">;
+
+def FeatureLS64 : Extension<"ls64", "LS64",
+ "Enable Armv8.7-A LD64B/ST64B Accelerator Extension (FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA)", [],
+ "FEAT_LS64", "", 520>;
+
+def FeatureSPE_EEF : SubtargetFeature<"spe-eef", "HasSPE_EEF",
+ "true", "Enable extra register in the Statistical Profiling Extension (FEAT_SPEv1p2)">;
+
+//===----------------------------------------------------------------------===//
+// Armv8.8 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureHBC : Extension<"hbc", "HBC",
+ "Enable Armv8.8-A Hinted Conditional Branches Extension (FEAT_HBC)">;
+
+def FeatureMOPS : Extension<"mops", "MOPS",
+ "Enable Armv8.8-A memcpy and memset acceleration instructions (FEAT_MOPS)", [],
+ "FEAT_MOPS", "+mops", 650>;
+
+def FeatureNMI : SubtargetFeature<"nmi", "HasNMI",
+ "true", "Enable Armv8.8-A Non-maskable Interrupts (FEAT_NMI, FEAT_GICv3_NMI)">;
+
+//===----------------------------------------------------------------------===//
+// Armv8.9 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureRASv2 : Extension<"rasv2", "RASv2",
+ "Enable ARMv8.9-A Reliability, Availability and Serviceability Extensions (FEAT_RASv2)",
+ [FeatureRAS]>;
+
+def FeatureCSSC : Extension<"cssc", "CSSC",
+ "Enable Common Short Sequence Compression (CSSC) instructions (FEAT_CSSC)">;
+
+def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB",
+ "true", "Enable Clear BHB instruction (FEAT_CLRBHB)">;
+
+def FeaturePRFM_SLC : SubtargetFeature<"prfm-slc-target", "HasPRFM_SLC",
+ "true", "Enable SLC target for PRFM instruction">;
+
+let MArchName = "predres2" in
+def FeatureSPECRES2 : Extension<"specres2", "SPECRES2",
+ "Enable Speculation Restriction Instruction (FEAT_SPECRES2)",
+ [FeaturePredRes]>;
+
+def FeatureRCPC3 : Extension<"rcpc3", "RCPC3",
+ "Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set (FEAT_LRCPC3)",
+ [FeatureRCPC_IMMO],
+ "FEAT_RCPC3", "+rcpc,+rcpc3", 241>;
+
+def FeatureTHE : Extension<"the", "THE",
+ "Enable Armv8.9-A Translation Hardening Extension (FEAT_THE)">;
+
+//===----------------------------------------------------------------------===//
+// Armv9.0 Architecture Extensions
+//===----------------------------------------------------------------------===//
+
+def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
+ "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
----------------
davemgreen wrote:
Ah I see. I didn't realize they needed to be ordered.
https://github.com/llvm/llvm-project/pull/95898
More information about the llvm-commits
mailing list