[llvm] [llvm][AArch64] Support -mcpu=apple-m4 (PR #95478)
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 13 15:01:13 PDT 2024
https://github.com/jroelofs created https://github.com/llvm/llvm-project/pull/95478
None
>From 1461be872bf26e2e0f2572f688a45af795421432 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs at apple.com>
Date: Thu, 13 Jun 2024 10:27:52 -0700
Subject: [PATCH] [llvm][AArch64] Support -mcpu=apple-m4
---
.../llvm/TargetParser/AArch64TargetParser.h | 9 +++++-
llvm/lib/Target/AArch64/AArch64Processors.td | 31 +++++++++++++++++--
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 ++
.../TargetParser/TargetParserTest.cpp | 17 +++++++++-
4 files changed, 55 insertions(+), 4 deletions(-)
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index df8e685eb6667..c1a68a0ec5c19 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -521,7 +521,14 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::ExtensionBitset({AArch64::AEK_AES, AArch64::AEK_SHA2,
AArch64::AEK_SHA3, AArch64::AEK_FP16,
AArch64::AEK_FP16FML})},
-
+ // Technically apple-m4 is ARMv9.2a, but a quirk of LLVM defines v9.0 as
+ // requiring SVE, which is optional according to the Arm ARM and not
+ // supported by the core. ARMv8.7a is the next closest choice.
+ {"apple-m4", ARMV8_7A,
+ AArch64::ExtensionBitset(
+ {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_SHA3,
+ AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_SME,
+ AArch64::AEK_SME2, AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64})},
{"apple-s4", ARMV8_3A,
AArch64::ExtensionBitset(
{AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_FP16})},
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index c04c20c78e8eb..57df6b85ab11d 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -398,6 +398,22 @@ def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
FeatureZCRegMove,
FeatureZCZeroing]>;
+def TuneAppleM4 : SubtargetFeature<"apple-m4", "ARMProcFamily", "AppleM4",
+ "Apple M4", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureZCRegMove,
+ FeatureZCZeroing
+ ]>;
+
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
[FeatureExynosCheapAsMoveHandling,
@@ -784,6 +800,14 @@ def ProcessorFeatures {
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
+ // Technically apple-m4 is ARMv9.2. See the corresponding comment in
+ // AArch64TargetParser.h.
+ list<SubtargetFeature> AppleM4 = [HasV8_7aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSHA3,
+ FeatureFullFP16, FeatureFP16FML,
+ FeatureAES, FeatureBF16,
+ FeatureSME2,
+ FeatureSMEF64F64, FeatureSMEI16I64];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1010,6 +1034,9 @@ def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,
[TuneAppleA17]>;
+def : ProcessorModel<"apple-m4", CycloneModel, ProcessorFeatures.AppleM4,
+ [TuneAppleM4]>;
+
// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
@@ -1025,8 +1052,8 @@ def : ProcessorModel<"apple-s5", CycloneModel, ProcessorFeatures.AppleA12,
[TuneAppleA12]>;
// Alias for the latest Apple processor model supported by LLVM.
-def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleA16,
- [TuneAppleA16]>;
+def : ProcessorModel<"apple-latest", CycloneModel, ProcessorFeatures.AppleM4,
+ [TuneAppleM4]>;
// Fujitsu A64FX
def : ProcessorModel<"a64fx", A64FXModel, ProcessorFeatures.A64FX,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 2920066cfdcff..1fad1d5ca6d7d 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -182,6 +182,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
case AppleA15:
case AppleA16:
case AppleA17:
+ case AppleM4:
CacheLineSize = 64;
PrefetchDistance = 280;
MinPrefetchStride = 2048;
@@ -191,6 +192,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
case AppleA15:
case AppleA16:
case AppleA17:
+ case AppleM4:
MaxInterleaveFactor = 4;
break;
default:
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 23555dfc68dc0..ccc101e907441 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1644,6 +1644,21 @@ INSTANTIATE_TEST_SUITE_P(
AArch64::AEK_I8MM, AArch64::AEK_JSCVT, AArch64::AEK_FCMA,
AArch64::AEK_PAUTH}),
"8.6-A"),
+ ARMCPUTestParams<AArch64::ExtensionBitset>(
+ "apple-m4", "armv8.7-a", "crypto-neon-fp-armv8",
+ AArch64::ExtensionBitset(
+ {AArch64::AEK_CRC, AArch64::AEK_AES,
+ AArch64::AEK_SHA2, AArch64::AEK_SHA3,
+ AArch64::AEK_FP, AArch64::AEK_SIMD,
+ AArch64::AEK_LSE, AArch64::AEK_RAS,
+ AArch64::AEK_RDM, AArch64::AEK_RCPC,
+ AArch64::AEK_DOTPROD, AArch64::AEK_FP16,
+ AArch64::AEK_FP16FML, AArch64::AEK_BF16,
+ AArch64::AEK_I8MM, AArch64::AEK_JSCVT,
+ AArch64::AEK_FCMA, AArch64::AEK_PAUTH,
+ AArch64::AEK_SME, AArch64::AEK_SME2,
+ AArch64::AEK_SMEF64F64, AArch64::AEK_SMEI16I64}),
+ "8.7-A"),
ARMCPUTestParams<AArch64::ExtensionBitset>(
"apple-s4", "armv8.3-a", "crypto-neon-fp-armv8",
AArch64::ExtensionBitset(
@@ -1872,7 +1887,7 @@ INSTANTIATE_TEST_SUITE_P(
ARMCPUTestParams<AArch64::ExtensionBitset>::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 79;
+static constexpr unsigned NumAArch64CPUArchs = 80;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
More information about the llvm-commits
mailing list