[clang] [llvm] [AArch64] Add support for -mcpu=gb10. (PR #146515)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 4 03:33:50 PDT 2025
https://github.com/rj-jesus updated https://github.com/llvm/llvm-project/pull/146515
>From 7857d5536c4145bc3b2c797222b25f3533db518f Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Wed, 2 Apr 2025 10:37:59 -0700
Subject: [PATCH 1/3] [AArch64] Add support for -mcpu=gb10.
This patch adds support for -mcpu=gb10 (NVIDIA GB10). This is a
big.LITTLE cluster of Cortex-X925 and Cortex-A725 cores. The appropriate
MIDR numbers are added to detect them in -mcpu=native.
We did not add an -mcpu=cortex-x925.cortex-a725 option because GB10 does
include the crypto instructions which we want on by default, and the
current convention is to not enable such extensions for Arm Cortex cores
in -mcpu where they are optional in the IP.
---
.../print-enabled-extensions/aarch64-gb10.c | 69 +++++++++++++++++++
.../Misc/target-invalid-cpu-note/aarch64.c | 1 +
llvm/lib/Target/AArch64/AArch64Processors.td | 3 +
llvm/lib/TargetParser/Host.cpp | 21 +++++-
llvm/unittests/TargetParser/Host.cpp | 8 +++
.../TargetParser/TargetParserTest.cpp | 3 +-
6 files changed, 102 insertions(+), 3 deletions(-)
create mode 100644 clang/test/Driver/print-enabled-extensions/aarch64-gb10.c
diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c b/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c
new file mode 100644
index 0000000000000..589f7e3e5ee4e
--- /dev/null
+++ b/clang/test/Driver/print-enabled-extensions/aarch64-gb10.c
@@ -0,0 +1,69 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang --target=aarch64 --print-enabled-extensions -mcpu=gb10 | FileCheck --strict-whitespace --implicit-check-not=FEAT_ %s
+
+// CHECK: Extensions enabled for the given AArch64 target
+// CHECK-EMPTY:
+// CHECK-NEXT: Architecture Feature(s) Description
+// CHECK-NEXT: FEAT_AES, FEAT_PMULL Enable AES support
+// CHECK-NEXT: FEAT_AMUv1 Enable Armv8.4-A Activity Monitors extension
+// CHECK-NEXT: FEAT_AMUv1p1 Enable Armv8.6-A Activity Monitors Virtualization support
+// CHECK-NEXT: FEAT_AdvSIMD Enable Advanced SIMD instructions
+// CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension
+// CHECK-NEXT: FEAT_BTI Enable Branch Target Identification
+// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets
+// CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions
+// CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction
+// CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions
+// CHECK-NEXT: FEAT_DPB Enable Armv8.2-A data Cache Clean to Point of Persistence
+// CHECK-NEXT: FEAT_DPB2 Enable Armv8.5-A Cache Clean to Point of Deep Persistence
+// CHECK-NEXT: FEAT_DotProd Enable dot product support
+// CHECK-NEXT: FEAT_ECV Enable enhanced counter virtualization extension
+// CHECK-NEXT: FEAT_ETE Enable Embedded Trace Extension
+// CHECK-NEXT: FEAT_FCMA Enable Armv8.3-A Floating-point complex number support
+// CHECK-NEXT: FEAT_FGT Enable fine grained virtualization traps extension
+// CHECK-NEXT: FEAT_FHM Enable FP16 FML instructions
+// CHECK-NEXT: FEAT_FP Enable Armv8.0-A Floating Point Extensions
+// CHECK-NEXT: FEAT_FP16 Enable half-precision floating-point data processing
+// CHECK-NEXT: FEAT_FPAC Enable Armv8.3-A Pointer Authentication Faulting enhancement
+// CHECK-NEXT: FEAT_FRINTTS Enable FRInt[32|64][Z|X] instructions that round a floating-point number to an integer (in FP format) forcing it to fit into a 32- or 64-bit int
+// CHECK-NEXT: FEAT_FlagM Enable Armv8.4-A Flag Manipulation instructions
+// CHECK-NEXT: FEAT_FlagM2 Enable alternative NZCV format for floating point comparisons
+// CHECK-NEXT: FEAT_HCX Enable Armv8.7-A HCRX_EL2 system register
+// CHECK-NEXT: FEAT_I8MM Enable Matrix Multiply Int8 Extension
+// CHECK-NEXT: FEAT_JSCVT Enable Armv8.3-A JavaScript FP conversion instructions
+// CHECK-NEXT: FEAT_LOR Enable Armv8.1-A Limited Ordering Regions extension
+// CHECK-NEXT: FEAT_LRCPC Enable support for RCPC extension
+// CHECK-NEXT: FEAT_LRCPC2 Enable Armv8.4-A RCPC instructions with Immediate Offsets
+// CHECK-NEXT: FEAT_LSE Enable Armv8.1-A Large System Extension (LSE) atomic instructions
+// CHECK-NEXT: FEAT_LSE2 Enable Armv8.4-A Large System Extension 2 (LSE2) atomicity rules
+// CHECK-NEXT: FEAT_MPAM Enable Armv8.4-A Memory system Partitioning and Monitoring extension
+// CHECK-NEXT: FEAT_MTE, FEAT_MTE2 Enable Memory Tagging Extension
+// CHECK-NEXT: FEAT_NV, FEAT_NV2 Enable Armv8.4-A Nested Virtualization Enchancement
+// CHECK-NEXT: FEAT_PAN Enable Armv8.1-A Privileged Access-Never extension
+// CHECK-NEXT: FEAT_PAN2 Enable Armv8.2-A PAN s1e1R and s1e1W Variants
+// CHECK-NEXT: FEAT_PAuth Enable Armv8.3-A Pointer Authentication extension
+// CHECK-NEXT: FEAT_PMUv3 Enable Armv8.0-A PMUv3 Performance Monitors extension
+// CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions
+// CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
+// CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier
+// CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension
+// CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
+// CHECK-NEXT: FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
+// CHECK-NEXT: FEAT_SM4, FEAT_SM3 Enable SM3 and SM4 support
+// CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension
+// CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions
+// CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension
+// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit
+// CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions
+// CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions
+// CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions
+// CHECK-NEXT: FEAT_SVE_BitPerm Enable bit permutation SVE2 instructions
+// CHECK-NEXT: FEAT_SVE_SHA3 Enable SVE SHA3 instructions
+// CHECK-NEXT: FEAT_SVE_SM4 Enable SM4 SVE2 instructions
+// CHECK-NEXT: FEAT_TLBIOS, FEAT_TLBIRANGE Enable Armv8.4-A TLB Range and Maintenance instructions
+// CHECK-NEXT: FEAT_TRBE Enable Trace Buffer Extension
+// CHECK-NEXT: FEAT_TRF Enable Armv8.4-A Trace extension
+// CHECK-NEXT: FEAT_UAO Enable Armv8.2-A UAO PState
+// CHECK-NEXT: FEAT_VHE Enable Armv8.1-A Virtual Host extension
+// CHECK-NEXT: FEAT_WFxT Enable Armv8.7-A WFET and WFIT instruction
+// CHECK-NEXT: FEAT_XS Enable Armv8.7-A limited-TLB-maintenance instruction
diff --git a/clang/test/Misc/target-invalid-cpu-note/aarch64.c b/clang/test/Misc/target-invalid-cpu-note/aarch64.c
index 0a5c485e896be..0346ab2bb6b13 100644
--- a/clang/test/Misc/target-invalid-cpu-note/aarch64.c
+++ b/clang/test/Misc/target-invalid-cpu-note/aarch64.c
@@ -75,6 +75,7 @@
// CHECK-SAME: {{^}}, exynos-m5
// CHECK-SAME: {{^}}, falkor
// CHECK-SAME: {{^}}, fujitsu-monaka
+// CHECK-SAME: {{^}}, gb10
// CHECK-SAME: {{^}}, generic
// CHECK-SAME: {{^}}, grace
// CHECK-SAME: {{^}}, kryo
diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td
index dcccde4a4d666..4484951821337 100644
--- a/llvm/lib/Target/AArch64/AArch64Processors.td
+++ b/llvm/lib/Target/AArch64/AArch64Processors.td
@@ -1117,6 +1117,7 @@ def ProcessorFeatures {
FeatureDotProd, FeatureFPARMv8, FeatureMatMulInt8,
FeatureSSBS, FeatureCCIDX,
FeatureJS, FeatureLSE, FeatureRAS, FeatureRCPC, FeatureRDM];
+ list<SubtargetFeature> GB10 = !listconcat(X925, [FeatureSVEAES, FeatureSVESHA3, FeatureSVE2SM4]);
list<SubtargetFeature> Grace = !listconcat(NeoverseV2, [FeatureSVE2SM4, FeatureSVEAES, FeatureSVESHA3]);
// ETE and TRBE are future architecture extensions. We temporarily enable them
@@ -1203,6 +1204,8 @@ def : ProcessorModel<"cortex-x4", NeoverseV2Model, ProcessorFeatures.X4,
[TuneX4]>;
def : ProcessorModel<"cortex-x925", NeoverseV2Model, ProcessorFeatures.X925,
[TuneX925]>;
+def : ProcessorModel<"gb10", NeoverseV2Model, ProcessorFeatures.GB10,
+ [TuneX925]>;
def : ProcessorModel<"grace", NeoverseV2Model, ProcessorFeatures.Grace,
[TuneNeoverseV2]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 5957e1befe2da..97f6de4be8cb2 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -179,22 +179,39 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
// Look for the CPU implementer line.
StringRef Implementer;
StringRef Hardware;
- StringRef Part;
+ SmallVector<StringRef, 32> Parts;
for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
if (Lines[I].starts_with("CPU implementer"))
Implementer = Lines[I].substr(15).ltrim("\t :");
if (Lines[I].starts_with("Hardware"))
Hardware = Lines[I].substr(8).ltrim("\t :");
if (Lines[I].starts_with("CPU part"))
- Part = Lines[I].substr(8).ltrim("\t :");
+ Parts.emplace_back(Lines[I].substr(8).ltrim("\t :"));
}
+ // Last `Part' seen, in case we don't analyse all `Parts' parsed.
+ StringRef Part = Parts.empty() ? StringRef() : Parts.back();
+
+ // Remove duplicate `Parts'.
+ llvm::sort(Parts);
+ Parts.erase(llvm::unique(Parts), Parts.end());
+
+ auto MatchBL = [](auto const &Parts, StringRef Big, StringRef Little) {
+ if (Parts.size() == 2)
+ return (Parts[0] == Big && Parts[1] == Little) ||
+ (Parts[1] == Big && Parts[0] == Little);
+ return false;
+ };
+
if (Implementer == "0x41") { // ARM Ltd.
// MSM8992/8994 may give cpu part for the core that the kernel is running on,
// which is undeterministic and wrong. Always return cortex-a53 for these SoC.
if (Hardware.ends_with("MSM8994") || Hardware.ends_with("MSM8996"))
return "cortex-a53";
+ // Detect big.LITTLE systems.
+ if (MatchBL(Parts, "0xd85", "0xd87"))
+ return "gb10";
// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
// values correspond to the "Part number" in the CP15/c0 register. The
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index c13d33e32c6d9..898a716c28d48 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -149,6 +149,14 @@ TEST(getLinuxHostCPUName, AArch64) {
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x61\n"
"CPU part : 0x039"),
"apple-m2");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd85\n"
+ "CPU part : 0xd87"),
+ "gb10");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd87\n"
+ "CPU part : 0xd85"),
+ "gb10");
// MSM8992/4 weirdness
StringRef MSM8992ProcCpuInfo = R"(
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 97ee8dd1cb67b..1d7ad8ac075f4 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1164,6 +1164,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64CPUTestParams("a64fx", "armv8.2-a"),
AArch64CPUTestParams("fujitsu-monaka", "armv9.3-a"),
AArch64CPUTestParams("carmel", "armv8.2-a"),
+ AArch64CPUTestParams("gb10", "armv9.2-a"),
AArch64CPUTestParams("grace", "armv9-a"),
AArch64CPUTestParams("olympus", "armv9.2-a"),
AArch64CPUTestParams("saphira", "armv8.4-a"),
@@ -1260,7 +1261,7 @@ INSTANTIATE_TEST_SUITE_P(
AArch64CPUAliasTestParams::PrintToStringParamName);
// Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 90;
+static constexpr unsigned NumAArch64CPUArchs = 91;
TEST(TargetParserTest, testAArch64CPUArchList) {
SmallVector<StringRef, NumAArch64CPUArchs> List;
>From b603412e3f8bba3e97d1fb38cc7cc1ccf945fcde Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Tue, 1 Jul 2025 08:50:35 -0700
Subject: [PATCH 2/3] Rename MatchBL to MatchBigLittle.
---
llvm/lib/TargetParser/Host.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 97f6de4be8cb2..032a2ce240403 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -176,7 +176,8 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
SmallVector<StringRef, 32> Lines;
ProcCpuinfoContent.split(Lines, '\n');
- // Look for the CPU implementer line.
+ // Look for the CPU implementer and hardware lines, and store the CPU part
+ // numbers found.
StringRef Implementer;
StringRef Hardware;
SmallVector<StringRef, 32> Parts;
@@ -196,7 +197,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
llvm::sort(Parts);
Parts.erase(llvm::unique(Parts), Parts.end());
- auto MatchBL = [](auto const &Parts, StringRef Big, StringRef Little) {
+ auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) {
if (Parts.size() == 2)
return (Parts[0] == Big && Parts[1] == Little) ||
(Parts[1] == Big && Parts[0] == Little);
@@ -210,7 +211,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
return "cortex-a53";
// Detect big.LITTLE systems.
- if (MatchBL(Parts, "0xd85", "0xd87"))
+ if (MatchBigLittle(Parts, "0xd85", "0xd87"))
return "gb10";
// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
>From 990a9d1ddc59b449b60379b0e4f83d139aa0ef14 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Thu, 3 Jul 2025 03:53:31 -0700
Subject: [PATCH 3/3] Pick cortex-x925 in native handling.
---
llvm/lib/TargetParser/Host.cpp | 2 +-
llvm/unittests/TargetParser/Host.cpp | 16 ++++++++--------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 032a2ce240403..e7c4e19eeb072 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -212,7 +212,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
// Detect big.LITTLE systems.
if (MatchBigLittle(Parts, "0xd85", "0xd87"))
- return "gb10";
+ return "cortex-x925";
// The CPU part is a 3 digit hexadecimal number with a 0x prefix. The
// values correspond to the "Part number" in the CP15/c0 register. The
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 898a716c28d48..0a9ac9bb0596d 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -122,6 +122,14 @@ TEST(getLinuxHostCPUName, AArch64) {
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
"CPU part : 0xd48"),
"cortex-x2");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd85\n"
+ "CPU part : 0xd87"),
+ "cortex-x925");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
+ "CPU part : 0xd87\n"
+ "CPU part : 0xd85"),
+ "cortex-x925");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
"CPU part : 0xc00"),
"falkor");
@@ -149,14 +157,6 @@ TEST(getLinuxHostCPUName, AArch64) {
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x61\n"
"CPU part : 0x039"),
"apple-m2");
- EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
- "CPU part : 0xd85\n"
- "CPU part : 0xd87"),
- "gb10");
- EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
- "CPU part : 0xd87\n"
- "CPU part : 0xd85"),
- "gb10");
// MSM8992/4 weirdness
StringRef MSM8992ProcCpuInfo = R"(
More information about the llvm-commits
mailing list