[clang] [llvm] [AArch64] Add initial support for -mcpu=olympus. (PR #132368)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 24 04:36:22 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/132368
>From b9725e115876f26311edd408b9d4521ae8a03ebd Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 4 Dec 2024 05:42:38 -0800
Subject: [PATCH 1/2] [AArch64] Add initial support for -mcpu=olympus.
This patch adds support for the NVIDIA Olympus core.
This does not add any special tuning decisions, and those may come
later.
---
clang/test/Driver/aarch64-nvidia-olympus.c | 13 +++
.../aarch64-olympus.c | 82 +++++++++++++++++++
.../Misc/target-invalid-cpu-note/aarch64.c | 1 +
llvm/lib/Target/AArch64/AArch64Processors.td | 25 ++++++
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 9 ++
llvm/lib/TargetParser/Host.cpp | 1 +
llvm/test/CodeGen/AArch64/cpus.ll | 1 +
llvm/unittests/TargetParser/Host.cpp | 4 +
.../TargetParser/TargetParserTest.cpp | 3 +-
9 files changed, 138 insertions(+), 1 deletion(-)
create mode 100644 clang/test/Driver/aarch64-nvidia-olympus.c
create mode 100644 clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
diff --git a/clang/test/Driver/aarch64-nvidia-olympus.c b/clang/test/Driver/aarch64-nvidia-olympus.c
new file mode 100644
index 0000000000000..e832d06917a25
--- /dev/null
+++ b/clang/test/Driver/aarch64-nvidia-olympus.c
@@ -0,0 +1,13 @@
+// RUN: %clang --target=aarch64 -mcpu=olympus -### -c %s 2>&1 | FileCheck -check-prefix=olympus %s
+// RUN: %clang --target=aarch64 -mlittle-endian -mcpu=olympus -### -c %s 2>&1 | FileCheck -check-prefix=olympus %s
+// RUN: %clang --target=aarch64 -mtune=olympus -### -c %s 2>&1 | FileCheck -check-prefix=olympus-TUNE %s
+// RUN: %clang --target=aarch64 -mlittle-endian -mtune=olympus -### -c %s 2>&1 | FileCheck -check-prefix=olympus-TUNE %s
+// olympus: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "olympus"
+// olympus-TUNE: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "generic"
+
+// RUN: %clang --target=arm64 -mcpu=olympus -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-olympus %s
+// RUN: %clang --target=arm64 -mlittle-endian -mcpu=olympus -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-olympus %s
+// RUN: %clang --target=arm64 -mtune=olympus -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-olympus-TUNE %s
+// RUN: %clang --target=arm64 -mlittle-endian -mtune=olympus -### -c %s 2>&1 | FileCheck -check-prefix=ARM64-olympus-TUNE %s
+// ARM64-olympus: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "olympus"
+// ARM64-olympus-TUNE: "-cc1"{{.*}} "-triple" "arm64{{.*}}" "-target-cpu" "generic"
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c b/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
new file mode 100644
index 0000000000000..a37ec4ac6aa7d
--- /dev/null
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
@@ -0,0 +1,82 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=olympus | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s
+
+// CHECK: Extensions enabled for the given AArch64 target
+// CHECK-EMPTY:
+// CHECK-NEXT: Architecture Feature(s) Description
+// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support
+// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension
+// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support
+// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions
+// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension
+// CHECK-NEXT: FEAT_BRBE Enable Branch Record Buffer Extension
+// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification
+// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets
+// CHECK-NEXT: FEAT_CHK Enable Armv8.0-A Check Feature Status Extension
+// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
+// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction
+// CHECK-NEXT: FEAT_Crypto Enable cryptographic instructions
+// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
+// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence
+// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence
+// CHECK-NEXT: FEAT_DotProd Enable dot product support
+// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension
+// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension
+// CHECK-NEXT: FEAT_FAMINMAX Enable FAMIN and FAMAX instructions
+// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
+// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension
+// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions
+// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions
+// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing
+// CHECK-NEXT: FEAT_FP8 Enable FP8 instructions
+// CHECK-NEXT: FEAT_FP8DOT2 Enable FP8 2-way dot instructions
+// CHECK-NEXT: FEAT_FP8DOT4 Enable FP8 4-way dot instructions
+// CHECK-NEXT: FEAT_FP8FMA Enable Armv9.5-A FP8 multiply-add instructions
+// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement
+// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
+// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
+// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons
+// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register
+// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension
+// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions
+// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension
+// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension
+// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets
+// CHECK-NEXT: FEAT_LS64, FEAT_LS64_V, FEAT_LS64_ACCDATA Enable Armv8.7-A LD64B/ST64B Accelerator Extension
+// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
+// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules
+// CHECK-NEXT: FEAT_LUT Enable Lookup Table instructions
+// CHECK-NEXT: FEAT_MEC Enable Memory Encryption Contexts Extension
+// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension
+// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
+// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement
+// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension
+// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants
+// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension
+// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension
+// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions
+// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
+// CHECK-NEXT: FEAT_RME Enable Realm Management Extension
+// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions
+// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
+// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
+// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
+// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
+// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
+// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension
+// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
+// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
+// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
+// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
+// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions
+// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
+// CHECK-NEXT: FEAT_SVE_SHA3 Enable SHA3 SVE2 instructions
+// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions
+// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
+// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
+// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
+// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState
+// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension
+// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction
+// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction
diff --git a/clang/test/Misc/target-invalid-cpu-note/aarch64.c b/clang/test/Misc/target-invalid-cpu-note/aarch64.c
index 98a2ca0447bcf..e8e728a27e410 100644
--- a/clang/test/Misc/target-invalid-cpu-note/aarch64.c
+++ b/clang/test/Misc/target-invalid-cpu-note/aarch64.c
@@ -86,6 +86,7 @@
// CHECK-SAME: {{^}}, neoverse-v2
// CHECK-SAME: {{^}}, neoverse-v3
// CHECK-SAME: {{^}}, neoverse-v3ae
+// CHECK-SAME: {{^}}, olympus
// CHECK-SAME: {{^}}, oryon-1
// CHECK-SAME: {{^}}, saphira
// CHECK-SAME: {{^}}, thunderx
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 30d9372e4afd1..7527df3860953 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -284,6 +284,17 @@ def TuneMONAKA : SubtargetFeature<"fujitsu-monaka", "ARMProcFamily", "MONAKA",
def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
"Nvidia Carmel processors">;
+def TuneOlympus : SubtargetFeature<"olympus", "ARMProcFamily", "Olympus",
+ "NVIDIA Olympus processors", [
+ FeatureALULSLFast,
+ FeatureCmpBccFusion,
+ FeatureEnableSelectOptimize,
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive,
+ FeatureUseFixedOverScalableIfEqualCost]>;
+
// Note that cyclone does not fuse AES instructions, but newer apple chips do
// perform the fusion and cyclone is used by default when targetting apple OSes.
def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
@@ -872,6 +883,16 @@ def ProcessorFeatures {
list<SubtargetFeature> Carmel = [HasV8_2aOps, FeatureNEON, FeatureSHA2, FeatureAES,
FeatureFullFP16, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM,
FeatureFPARMv8];
+ list<SubtargetFeature> Olympus = [HasV9_2aOps, FeatureBRBE, FeatureCCIDX,
+ FeatureCHK, FeatureCrypto, FeatureETE,
+ FeatureFAMINMAX, FeatureFP16FML,
+ FeatureFP8DOT2, FeatureFP8DOT4,
+ FeatureFP8FMA, FeatureFPAC, FeatureLS64,
+ FeatureLUT, FeatureMEC, FeatureMTE,
+ FeaturePerfMon, FeatureRandGen, FeatureSPE,
+ FeatureSPE_EEF, FeatureSSBS,
+ FeatureSVEBitPerm, FeatureSVE2SHA3,
+ FeatureSVE2SM4, FeatureSVEAES];
list<SubtargetFeature> AppleA7 = [HasV8_0aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
FeatureNEON,FeaturePerfMon];
list<SubtargetFeature> AppleA10 = [HasV8_0aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
@@ -1266,6 +1287,10 @@ def : ProcessorModel<"fujitsu-monaka", A64FXModel, ProcessorFeatures.MONAKA,
def : ProcessorModel<"carmel", NoSchedModel, ProcessorFeatures.Carmel,
[TuneCarmel]>;
+// NVIDIA Olympus
+def : ProcessorModel<"olympus", NeoverseV2Model, ProcessorFeatures.Olympus,
+ [TuneOlympus]>;
+
// Ampere Computing
def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1,
[TuneAmpere1]>;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index f7defe79c6d31..72b3da8d19876 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -349,6 +349,15 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
PrefetchDistance = 128;
MinPrefetchStride = 1024;
break;
+ case Olympus:
+ EpilogueVectorizationMinVF = 8;
+ MaxInterleaveFactor = 4;
+ ScatterOverhead = 13;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(32);
+ MaxBytesForLoopAlignment = 16;
+ VScaleForTuning = 1;
+ break;
}
if (AArch64MinimumJumpTableEntries.getNumOccurrences() > 0 || !HasMinSize)
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index d6a16143fe9e9..48c07688b3d9c 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -288,6 +288,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
if (Implementer == "0x4e") { // NVIDIA Corporation
return StringSwitch<const char *>(Part)
.Case("0x004", "carmel")
+ .Case("0x10", "olympus")
.Default("generic");
}
diff --git a/llvm/test/CodeGen/AArch64/cpus.ll b/llvm/test/CodeGen/AArch64/cpus.ll
index 363f0a0598e23..a0ee54ce71a1e 100644
--- a/llvm/test/CodeGen/AArch64/cpus.ll
+++ b/llvm/test/CodeGen/AArch64/cpus.ll
@@ -3,6 +3,7 @@
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=carmel 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=olympus 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a35 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a34 2>&1 | FileCheck %s
; RUN: llc < %s -mtriple=arm64-unknown-unknown -mcpu=cortex-a53 2>&1 | FileCheck %s
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 6eb13649a5904..057f42d729c57 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -305,6 +305,10 @@ CPU revision : 0
EXPECT_EQ(sys::detail::getHostCPUNameForARM(CarmelProcCpuInfo), "carmel");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x4e\n"
+ "CPU part : 0x10"),
+ "olympus");
+
// Snapdragon mixed implementer quirk
const std::string Snapdragon865ProcCPUInfo = R"(
processor : 0
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 5d771a1a153f7..dcffc9471705f 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1168,6 +1168,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64CPUTestParams("fujitsu-monaka", "armv9.3-a"),
AArch64CPUTestParams("carmel", "armv8.2-a"),
AArch64CPUTestParams("grace", "armv9-a"),
+ AArch64CPUTestParams("olympus", "armv9.2-a"),
AArch64CPUTestParams("saphira", "armv8.4-a"),
AArch64CPUTestParams("oryon-1", "armv8.6-a")),
AArch64CPUTestParams::PrintToStringParamName);
@@ -1262,7 +1263,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64CPUAliasTestParams::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 88;
+static constexpr unsigned NumAArch64CPUArchs = 89;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
>From 8663de2ef03c707e06803809d68a5df210044d97 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Mon, 24 Mar 2025 03:53:03 -0700
Subject: [PATCH 2/2] Remove FeatureCrypto and match "0x010" for the part
number
---
.../print-enabled-extensions/aarch64-olympus.c | 1 -
llvm/lib/Target/AArch64/AArch64Processors.td | 13 ++++++-------
llvm/lib/TargetParser/Host.cpp | 1 +
llvm/unittests/TargetParser/Host.cpp | 4 ++++
4 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c b/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
index a37ec4ac6aa7d..64106e583d5f8 100644
--- a/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-olympus.c
@@ -15,7 +15,6 @@
// CHECK-NEXT: FEAT_CHK Enable Armv8.0-A Check Feature Status Extension
// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction
-// CHECK-NEXT: FEAT_Crypto Enable cryptographic instructions
// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence
// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index 7527df3860953..e62cbd5be7609 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -884,13 +884,12 @@ def ProcessorFeatures {
FeatureFullFP16, FeatureCRC, FeatureLSE, FeatureRAS, FeatureRDM,
FeatureFPARMv8];
list<SubtargetFeature> Olympus = [HasV9_2aOps, FeatureBRBE, FeatureCCIDX,
- FeatureCHK, FeatureCrypto, FeatureETE,
- FeatureFAMINMAX, FeatureFP16FML,
- FeatureFP8DOT2, FeatureFP8DOT4,
- FeatureFP8FMA, FeatureFPAC, FeatureLS64,
- FeatureLUT, FeatureMEC, FeatureMTE,
- FeaturePerfMon, FeatureRandGen, FeatureSPE,
- FeatureSPE_EEF, FeatureSSBS,
+ FeatureCHK, FeatureETE, FeatureFAMINMAX,
+ FeatureFP16FML, FeatureFP8DOT2,
+ FeatureFP8DOT4, FeatureFP8FMA, FeatureFPAC,
+ FeatureLS64, FeatureLUT, FeatureMEC,
+ FeatureMTE, FeaturePerfMon, FeatureRandGen,
+ FeatureSPE, FeatureSPE_EEF, FeatureSSBS,
FeatureSVEBitPerm, FeatureSVE2SHA3,
FeatureSVE2SM4, FeatureSVEAES];
list<SubtargetFeature> AppleA7 = [HasV8_0aOps, FeatureSHA2, FeatureAES, FeatureFPARMv8,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 48c07688b3d9c..2350a64346107 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -289,6 +289,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
return StringSwitch<const char *>(Part)
.Case("0x004", "carmel")
.Case("0x10", "olympus")
+ .Case("0x010", "olympus")
.Default("generic");
}
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 057f42d729c57..49cbeaa2739f2 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -309,6 +309,10 @@ CPU revision : 0
"CPU part : 0x10"),
"olympus");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x4e\n"
+ "CPU part : 0x010"),
+ "olympus");
+
// Snapdragon mixed implementer quirk
const std::string Snapdragon865ProcCPUInfo = R"(
processor : 0
More information about the llvm-commits
mailing list