[llvm] [win][aarch64] Add support for detecting the Host CPU on Arm64 Windows (PR #151596)
Daniel Paoliello via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 16:47:55 PDT 2025
https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/151596
>From f62a90e267045eecbdfd9c499977791b04869a53 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Wed, 30 Jul 2025 15:36:09 -0700
Subject: [PATCH] [win][aarch64] Add support for detecting the Host CPU on
Arm64 Windows
---
llvm/include/llvm/TargetParser/Host.h | 3 +
llvm/lib/TargetParser/Host.cpp | 195 ++++++++++++++++++++------
llvm/unittests/TargetParser/Host.cpp | 27 ++++
3 files changed, 186 insertions(+), 39 deletions(-)
diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
index 40a9b6cc13902..b44b9b9a4d069 100644
--- a/llvm/include/llvm/TargetParser/Host.h
+++ b/llvm/include/llvm/TargetParser/Host.h
@@ -13,6 +13,7 @@
#ifndef LLVM_TARGETPARSER_HOST_H
#define LLVM_TARGETPARSER_HOST_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Support/Compiler.h"
#include <string>
@@ -63,6 +64,8 @@ namespace detail {
/// Helper functions to extract HostCPUName from /proc/cpuinfo on linux.
LLVM_ABI StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent);
LLVM_ABI StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent);
+LLVM_ABI StringRef getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
+ ArrayRef<uint64_t> UniqueCpuInfos);
LLVM_ABI StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent);
LLVM_ABI StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
LLVM_ABI StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent);
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 7e09d30bf3d55..f11c4b44c266a 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/TargetParser/Host.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -167,35 +168,10 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
.Default(generic);
}
-StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
- // The cpuid register on arm is not accessible from user space. On Linux,
- // it is exposed through the /proc/cpuinfo file.
-
- // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
- // in all cases.
- SmallVector<StringRef, 32> Lines;
- ProcCpuinfoContent.split(Lines, '\n');
-
- // Look for the CPU implementer and hardware lines, and store the CPU part
- // numbers found.
- StringRef Implementer;
- StringRef Hardware;
- SmallVector<StringRef, 32> Parts;
- for (StringRef Line : Lines) {
- if (Line.consume_front("CPU implementer"))
- Implementer = Line.ltrim("\t :");
- else if (Line.consume_front("Hardware"))
- Hardware = Line.ltrim("\t :");
- else if (Line.consume_front("CPU part"))
- Parts.emplace_back(Line.ltrim("\t :"));
- }
-
- // Last `Part' seen, in case we don't analyse all `Parts' parsed.
- StringRef Part = Parts.empty() ? StringRef() : Parts.back();
-
- // Remove duplicate `Parts'.
- llvm::sort(Parts);
- Parts.erase(llvm::unique(Parts), Parts.end());
+StringRef
+getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware,
+ StringRef Part, ArrayRef<StringRef> Parts,
+ function_ref<unsigned()> GetVariant) {
auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) {
if (Parts.size() == 2)
@@ -343,21 +319,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
// The Exynos chips have a convoluted ID scheme that doesn't seem to follow
// any predictive pattern across variants and parts.
- unsigned Variant = 0, Part = 0;
// Look for the CPU variant line, whose value is a 1 digit hexadecimal
// number, corresponding to the Variant bits in the CP15/C0 register.
- for (auto I : Lines)
- if (I.consume_front("CPU variant"))
- I.ltrim("\t :").getAsInteger(0, Variant);
+ unsigned Variant = GetVariant();
- // Look for the CPU part line, whose value is a 3 digit hexadecimal
- // number, corresponding to the PartNum bits in the CP15/C0 register.
- for (auto I : Lines)
- if (I.consume_front("CPU part"))
- I.ltrim("\t :").getAsInteger(0, Part);
+ // Convert the CPU part line, whose value is a 3 digit hexadecimal number,
+ // corresponding to the PartNum bits in the CP15/C0 register.
+ unsigned PartAsInt;
+ Part.getAsInteger(0, PartAsInt);
- unsigned Exynos = (Variant << 12) | Part;
+ unsigned Exynos = (Variant << 12) | PartAsInt;
switch (Exynos) {
default:
// Default by falling through to Exynos M3.
@@ -416,6 +388,86 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
return "generic";
}
+StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
+ // The cpuid register on arm is not accessible from user space. On Linux,
+ // it is exposed through the /proc/cpuinfo file.
+
+ // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
+ // in all cases.
+ SmallVector<StringRef, 32> Lines;
+ ProcCpuinfoContent.split(Lines, '\n');
+
+ // Look for the CPU implementer and hardware lines, and store the CPU part
+ // numbers found.
+ StringRef Implementer;
+ StringRef Hardware;
+ SmallVector<StringRef, 32> Parts;
+ for (StringRef Line : Lines) {
+ if (Line.consume_front("CPU implementer"))
+ Implementer = Line.ltrim("\t :");
+ else if (Line.consume_front("Hardware"))
+ Hardware = Line.ltrim("\t :");
+ else if (Line.consume_front("CPU part"))
+ Parts.emplace_back(Line.ltrim("\t :"));
+ }
+
+ // Last `Part' seen, in case we don't analyse all `Parts' parsed.
+ StringRef Part = Parts.empty() ? StringRef() : Parts.back();
+
+ // Remove duplicate `Parts'.
+ llvm::sort(Parts);
+ Parts.erase(llvm::unique(Parts), Parts.end());
+
+ auto GetVariant = [&]() {
+ unsigned Variant = 0;
+ for (auto I : Lines)
+ if (I.consume_front("CPU variant"))
+ I.ltrim("\t :").getAsInteger(0, Variant);
+ return Variant;
+ };
+
+ return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts,
+ GetVariant);
+}
+
+StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
+ ArrayRef<uint64_t> UniqueCpuInfos) {
+ // On Windows, the registry provides cached copied of the MIDR_EL1 register.
+ union MIDR_EL1 {
+ uint64_t Raw;
+ struct _Components {
+ uint64_t Revision : 4;
+ uint64_t Partnum : 12;
+ uint64_t Architecture : 4;
+ uint64_t Variant : 4;
+ uint64_t Implementer : 8;
+ uint64_t Reserved : 32;
+ } Components;
+ };
+
+ SmallVector<std::string> PartsHolder;
+ PartsHolder.reserve(UniqueCpuInfos.size());
+ for (auto Info : UniqueCpuInfos)
+ PartsHolder.push_back("0x" + utohexstr(MIDR_EL1{Info}.Components.Partnum,
+ /*LowerCase*/ true,
+ /*Width*/ 3));
+
+ SmallVector<StringRef> Parts;
+ Parts.reserve(PartsHolder.size());
+ for (const auto &Part : PartsHolder)
+ Parts.push_back(Part);
+
+ return getHostCPUNameForARMFromComponents(
+ "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Implementer,
+ /*LowerCase*/ true,
+ /*Width*/ 2),
+ /*Hardware*/ "",
+ "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Partnum,
+ /*LowerCase*/ true,
+ /*Width*/ 3),
+ Parts, [=]() { return MIDR_EL1{PrimaryCpuInfo}.Components.Variant; });
+}
+
namespace {
StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
switch (Id) {
@@ -1450,6 +1502,71 @@ StringRef sys::getHostCPUName() {
return "generic";
}
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
+
+StringRef sys::getHostCPUName() {
+ constexpr char CentralProcessorKeyName[] =
+ "HARDWARE\\DESCRIPTION\\System\\CentralProcessor";
+ // Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be
+ // enough for the slash and name.
+ constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10;
+
+ SmallVector<uint64_t> Values;
+ uint64_t PrimaryCpuInfo;
+ char PrimaryPartKeyName[SubKeyNameMaxSize];
+ HKEY CentralProcessorKey;
+ if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ,
+ &CentralProcessorKey) == ERROR_SUCCESS) {
+ for (unsigned Index = 0; Index < UINT32_MAX; ++Index) {
+ char SubKeyName[SubKeyNameMaxSize];
+ DWORD SubKeySize = SubKeyNameMaxSize;
+ HKEY SubKey;
+ if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize,
+ nullptr, nullptr, nullptr,
+ nullptr) == ERROR_SUCCESS) &&
+ (RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ,
+ &SubKey) == ERROR_SUCCESS)) {
+ // The "CP 4000" registry key contains a cached copy of the MIDR_EL1
+ // register.
+ uint64_t RegValue;
+ DWORD ActualType;
+ DWORD RegValueSize = sizeof(RegValue);
+ if ((RegQueryValueExA(SubKey, "CP 4000", nullptr, &ActualType,
+ (PBYTE)&RegValue,
+ &RegValueSize) == ERROR_SUCCESS) &&
+ (ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) {
+ // Assume that the part with the "lowest" reg key name is the primary
+ // part. Win32 makes no guarantees about the order of sub keys, so we
+ // have to check the name.
+ if (Values.empty() ||
+ ::memcmp(SubKeyName, PrimaryPartKeyName, SubKeyNameMaxSize) < 0) {
+ PrimaryCpuInfo = RegValue;
+ ::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1);
+ }
+ if (!llvm::is_contained(Values, RegValue)) {
+ Values.push_back(RegValue);
+ }
+ }
+ RegCloseKey(SubKey);
+ } else {
+ // No more sub keys.
+ break;
+ }
+ }
+ RegCloseKey(CentralProcessorKey);
+ }
+
+ if (Values.empty()) {
+ return "generic";
+ }
+
+ // Win32 makes no guarantees about the order of sub keys, so sort to ensure
+ // reproducibility.
+ llvm::sort(Values);
+
+ return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values);
+}
+
#elif defined(__APPLE__) && defined(__powerpc__)
StringRef sys::getHostCPUName() {
host_basic_info_data_t hostInfo;
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 0a9ac9bb0596d..be8548ebf8551 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -59,16 +59,28 @@ Serial : 0000000000000000
EXPECT_EQ(sys::detail::getHostCPUNameForARM(CortexA9ProcCpuinfo),
"cortex-a9");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x4100c090, ArrayRef<uint64_t>{0x4100c090, 0x4100c090}),
+ "cortex-a9");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
"CPU part : 0xc0f"),
"cortex-a15");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4100c0f0,
+ ArrayRef<uint64_t>{0x4100c0f0}),
+ "cortex-a15");
// Verify that both CPU implementer and CPU part are checked:
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x40\n"
"CPU part : 0xc0f"),
"generic");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4000c0f0,
+ ArrayRef<uint64_t>{0x4000c0f0}),
+ "generic");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
"CPU part : 0x06f"),
"krait");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x510006f0,
+ ArrayRef<uint64_t>{0x510006f0}),
+ "krait");
}
TEST(getLinuxHostCPUName, AArch64) {
@@ -126,10 +138,16 @@ TEST(getLinuxHostCPUName, AArch64) {
"CPU part : 0xd85\n"
"CPU part : 0xd87"),
"cortex-x925");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x4100d850, ArrayRef<uint64_t>{0x4100d850, 0x4100d870}),
+ "cortex-x925");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
"CPU part : 0xd87\n"
"CPU part : 0xd85"),
"cortex-x925");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x4100d870, ArrayRef<uint64_t>{0x4100d870, 0x4100d850}),
+ "cortex-x925");
EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
"CPU part : 0xc00"),
"falkor");
@@ -200,16 +218,25 @@ CPU architecture: 8
"CPU variant : 0xc\n"
"CPU part : 0xafe"),
"exynos-m3");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x53c0afe0, ArrayRef<uint64_t>{0x53c0afe0, 0x5300d050}),
+ "exynos-m3");
// Verify Exynos M3.
EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
"CPU variant : 0x1\n"
"CPU part : 0x002"),
"exynos-m3");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x53100020, ArrayRef<uint64_t>{0x53100020, 0x5300d050}),
+ "exynos-m3");
// Verify Exynos M4.
EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
"CPU variant : 0x1\n"
"CPU part : 0x003"),
"exynos-m4");
+ EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+ 0x53100030, ArrayRef<uint64_t>{0x53100030, 0x5300d050}),
+ "exynos-m4");
const std::string ThunderX2T99ProcCpuInfo = R"(
processor : 0
More information about the llvm-commits
mailing list