[llvm] [win][aarch64] Add support for detecting the Host CPU on Arm64 Windows (PR #151596)

Daniel Paoliello via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 1 16:47:55 PDT 2025


https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/151596

>From f62a90e267045eecbdfd9c499977791b04869a53 Mon Sep 17 00:00:00 2001
From: Daniel Paoliello <danpao at microsoft.com>
Date: Wed, 30 Jul 2025 15:36:09 -0700
Subject: [PATCH] [win][aarch64] Add support for detecting the Host CPU on
 Arm64 Windows

---
 llvm/include/llvm/TargetParser/Host.h |   3 +
 llvm/lib/TargetParser/Host.cpp        | 195 ++++++++++++++++++++------
 llvm/unittests/TargetParser/Host.cpp  |  27 ++++
 3 files changed, 186 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/TargetParser/Host.h b/llvm/include/llvm/TargetParser/Host.h
index 40a9b6cc13902..b44b9b9a4d069 100644
--- a/llvm/include/llvm/TargetParser/Host.h
+++ b/llvm/include/llvm/TargetParser/Host.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TARGETPARSER_HOST_H
 #define LLVM_TARGETPARSER_HOST_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Compiler.h"
 #include <string>
 
@@ -63,6 +64,8 @@ namespace detail {
 /// Helper functions to extract HostCPUName from /proc/cpuinfo on linux.
 LLVM_ABI StringRef getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent);
 LLVM_ABI StringRef getHostCPUNameForARM(StringRef ProcCpuinfoContent);
+LLVM_ABI StringRef getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
+                                        ArrayRef<uint64_t> UniqueCpuInfos);
 LLVM_ABI StringRef getHostCPUNameForS390x(StringRef ProcCpuinfoContent);
 LLVM_ABI StringRef getHostCPUNameForRISCV(StringRef ProcCpuinfoContent);
 LLVM_ABI StringRef getHostCPUNameForSPARC(StringRef ProcCpuinfoContent);
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 7e09d30bf3d55..f11c4b44c266a 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TargetParser/Host.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
@@ -167,35 +168,10 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
       .Default(generic);
 }
 
-StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
-  // The cpuid register on arm is not accessible from user space. On Linux,
-  // it is exposed through the /proc/cpuinfo file.
-
-  // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
-  // in all cases.
-  SmallVector<StringRef, 32> Lines;
-  ProcCpuinfoContent.split(Lines, '\n');
-
-  // Look for the CPU implementer and hardware lines, and store the CPU part
-  // numbers found.
-  StringRef Implementer;
-  StringRef Hardware;
-  SmallVector<StringRef, 32> Parts;
-  for (StringRef Line : Lines) {
-    if (Line.consume_front("CPU implementer"))
-      Implementer = Line.ltrim("\t :");
-    else if (Line.consume_front("Hardware"))
-      Hardware = Line.ltrim("\t :");
-    else if (Line.consume_front("CPU part"))
-      Parts.emplace_back(Line.ltrim("\t :"));
-  }
-
-  // Last `Part' seen, in case we don't analyse all `Parts' parsed.
-  StringRef Part = Parts.empty() ? StringRef() : Parts.back();
-
-  // Remove duplicate `Parts'.
-  llvm::sort(Parts);
-  Parts.erase(llvm::unique(Parts), Parts.end());
+StringRef
+getHostCPUNameForARMFromComponents(StringRef Implementer, StringRef Hardware,
+                                   StringRef Part, ArrayRef<StringRef> Parts,
+                                   function_ref<unsigned()> GetVariant) {
 
   auto MatchBigLittle = [](auto const &Parts, StringRef Big, StringRef Little) {
     if (Parts.size() == 2)
@@ -343,21 +319,17 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
   if (Implementer == "0x53") { // Samsung Electronics Co., Ltd.
     // The Exynos chips have a convoluted ID scheme that doesn't seem to follow
     // any predictive pattern across variants and parts.
-    unsigned Variant = 0, Part = 0;
 
     // Look for the CPU variant line, whose value is a 1 digit hexadecimal
     // number, corresponding to the Variant bits in the CP15/C0 register.
-    for (auto I : Lines)
-      if (I.consume_front("CPU variant"))
-        I.ltrim("\t :").getAsInteger(0, Variant);
+    unsigned Variant = GetVariant();
 
-    // Look for the CPU part line, whose value is a 3 digit hexadecimal
-    // number, corresponding to the PartNum bits in the CP15/C0 register.
-    for (auto I : Lines)
-      if (I.consume_front("CPU part"))
-        I.ltrim("\t :").getAsInteger(0, Part);
+    // Convert the CPU part line, whose value is a 3 digit hexadecimal number,
+    // corresponding to the PartNum bits in the CP15/C0 register.
+    unsigned PartAsInt;
+    Part.getAsInteger(0, PartAsInt);
 
-    unsigned Exynos = (Variant << 12) | Part;
+    unsigned Exynos = (Variant << 12) | PartAsInt;
     switch (Exynos) {
     default:
       // Default by falling through to Exynos M3.
@@ -416,6 +388,86 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
   return "generic";
 }
 
+StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
+  // The cpuid register on arm is not accessible from user space. On Linux,
+  // it is exposed through the /proc/cpuinfo file.
+
+  // Read 32 lines from /proc/cpuinfo, which should contain the CPU part line
+  // in all cases.
+  SmallVector<StringRef, 32> Lines;
+  ProcCpuinfoContent.split(Lines, '\n');
+
+  // Look for the CPU implementer and hardware lines, and store the CPU part
+  // numbers found.
+  StringRef Implementer;
+  StringRef Hardware;
+  SmallVector<StringRef, 32> Parts;
+  for (StringRef Line : Lines) {
+    if (Line.consume_front("CPU implementer"))
+      Implementer = Line.ltrim("\t :");
+    else if (Line.consume_front("Hardware"))
+      Hardware = Line.ltrim("\t :");
+    else if (Line.consume_front("CPU part"))
+      Parts.emplace_back(Line.ltrim("\t :"));
+  }
+
+  // Last `Part' seen, in case we don't analyse all `Parts' parsed.
+  StringRef Part = Parts.empty() ? StringRef() : Parts.back();
+
+  // Remove duplicate `Parts'.
+  llvm::sort(Parts);
+  Parts.erase(llvm::unique(Parts), Parts.end());
+
+  auto GetVariant = [&]() {
+    unsigned Variant = 0;
+    for (auto I : Lines)
+      if (I.consume_front("CPU variant"))
+        I.ltrim("\t :").getAsInteger(0, Variant);
+    return Variant;
+  };
+
+  return getHostCPUNameForARMFromComponents(Implementer, Hardware, Part, Parts,
+                                            GetVariant);
+}
+
+StringRef sys::detail::getHostCPUNameForARM(uint64_t PrimaryCpuInfo,
+                                            ArrayRef<uint64_t> UniqueCpuInfos) {
+  // On Windows, the registry provides cached copied of the MIDR_EL1 register.
+  union MIDR_EL1 {
+    uint64_t Raw;
+    struct _Components {
+      uint64_t Revision : 4;
+      uint64_t Partnum : 12;
+      uint64_t Architecture : 4;
+      uint64_t Variant : 4;
+      uint64_t Implementer : 8;
+      uint64_t Reserved : 32;
+    } Components;
+  };
+
+  SmallVector<std::string> PartsHolder;
+  PartsHolder.reserve(UniqueCpuInfos.size());
+  for (auto Info : UniqueCpuInfos)
+    PartsHolder.push_back("0x" + utohexstr(MIDR_EL1{Info}.Components.Partnum,
+                                           /*LowerCase*/ true,
+                                           /*Width*/ 3));
+
+  SmallVector<StringRef> Parts;
+  Parts.reserve(PartsHolder.size());
+  for (const auto &Part : PartsHolder)
+    Parts.push_back(Part);
+
+  return getHostCPUNameForARMFromComponents(
+      "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Implementer,
+                       /*LowerCase*/ true,
+                       /*Width*/ 2),
+      /*Hardware*/ "",
+      "0x" + utohexstr(MIDR_EL1{PrimaryCpuInfo}.Components.Partnum,
+                       /*LowerCase*/ true,
+                       /*Width*/ 3),
+      Parts, [=]() { return MIDR_EL1{PrimaryCpuInfo}.Components.Variant; });
+}
+
 namespace {
 StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
   switch (Id) {
@@ -1450,6 +1502,71 @@ StringRef sys::getHostCPUName() {
   return "generic";
 }
 
+#elif defined(_M_ARM64) || defined(_M_ARM64EC)
+
+StringRef sys::getHostCPUName() {
+  constexpr char CentralProcessorKeyName[] =
+      "HARDWARE\\DESCRIPTION\\System\\CentralProcessor";
+  // Sub keys names are simple numbers ("0", "1", etc.) so 10 chars should be
+  // enough for the slash and name.
+  constexpr size_t SubKeyNameMaxSize = ARRAYSIZE(CentralProcessorKeyName) + 10;
+
+  SmallVector<uint64_t> Values;
+  uint64_t PrimaryCpuInfo;
+  char PrimaryPartKeyName[SubKeyNameMaxSize];
+  HKEY CentralProcessorKey;
+  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, CentralProcessorKeyName, 0, KEY_READ,
+                    &CentralProcessorKey) == ERROR_SUCCESS) {
+    for (unsigned Index = 0; Index < UINT32_MAX; ++Index) {
+      char SubKeyName[SubKeyNameMaxSize];
+      DWORD SubKeySize = SubKeyNameMaxSize;
+      HKEY SubKey;
+      if ((RegEnumKeyExA(CentralProcessorKey, Index, SubKeyName, &SubKeySize,
+                         nullptr, nullptr, nullptr,
+                         nullptr) == ERROR_SUCCESS) &&
+          (RegOpenKeyExA(CentralProcessorKey, SubKeyName, 0, KEY_READ,
+                         &SubKey) == ERROR_SUCCESS)) {
+        // The "CP 4000" registry key contains a cached copy of the MIDR_EL1
+        // register.
+        uint64_t RegValue;
+        DWORD ActualType;
+        DWORD RegValueSize = sizeof(RegValue);
+        if ((RegQueryValueExA(SubKey, "CP 4000", nullptr, &ActualType,
+                              (PBYTE)&RegValue,
+                              &RegValueSize) == ERROR_SUCCESS) &&
+            (ActualType == REG_QWORD) && RegValueSize == sizeof(RegValue)) {
+          // Assume that the part with the "lowest" reg key name is the primary
+          // part. Win32 makes no guarantees about the order of sub keys, so we
+          // have to check the name.
+          if (Values.empty() ||
+              ::memcmp(SubKeyName, PrimaryPartKeyName, SubKeyNameMaxSize) < 0) {
+            PrimaryCpuInfo = RegValue;
+            ::memcpy(PrimaryPartKeyName, SubKeyName, SubKeySize + 1);
+          }
+          if (!llvm::is_contained(Values, RegValue)) {
+            Values.push_back(RegValue);
+          }
+        }
+        RegCloseKey(SubKey);
+      } else {
+        // No more sub keys.
+        break;
+      }
+    }
+    RegCloseKey(CentralProcessorKey);
+  }
+
+  if (Values.empty()) {
+    return "generic";
+  }
+
+  // Win32 makes no guarantees about the order of sub keys, so sort to ensure
+  // reproducibility.
+  llvm::sort(Values);
+
+  return detail::getHostCPUNameForARM(PrimaryCpuInfo, Values);
+}
+
 #elif defined(__APPLE__) && defined(__powerpc__)
 StringRef sys::getHostCPUName() {
   host_basic_info_data_t hostInfo;
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 0a9ac9bb0596d..be8548ebf8551 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -59,16 +59,28 @@ Serial          : 0000000000000000
 
   EXPECT_EQ(sys::detail::getHostCPUNameForARM(CortexA9ProcCpuinfo),
             "cortex-a9");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x4100c090, ArrayRef<uint64_t>{0x4100c090, 0x4100c090}),
+            "cortex-a9");
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
                                               "CPU part        : 0xc0f"),
             "cortex-a15");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4100c0f0,
+                                              ArrayRef<uint64_t>{0x4100c0f0}),
+            "cortex-a15");
   // Verify that both CPU implementer and CPU part are checked:
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x40\n"
                                               "CPU part        : 0xc0f"),
             "generic");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x4000c0f0,
+                                              ArrayRef<uint64_t>{0x4000c0f0}),
+            "generic");
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
                                               "CPU part        : 0x06f"),
             "krait");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(0x510006f0,
+                                              ArrayRef<uint64_t>{0x510006f0}),
+            "krait");
 }
 
 TEST(getLinuxHostCPUName, AArch64) {
@@ -126,10 +138,16 @@ TEST(getLinuxHostCPUName, AArch64) {
                                               "CPU part        : 0xd85\n"
                                               "CPU part        : 0xd87"),
             "cortex-x925");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x4100d850, ArrayRef<uint64_t>{0x4100d850, 0x4100d870}),
+            "cortex-x925");
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x41\n"
                                               "CPU part        : 0xd87\n"
                                               "CPU part        : 0xd85"),
             "cortex-x925");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x4100d870, ArrayRef<uint64_t>{0x4100d870, 0x4100d850}),
+            "cortex-x925");
   EXPECT_EQ(sys::detail::getHostCPUNameForARM("CPU implementer : 0x51\n"
                                               "CPU part        : 0xc00"),
             "falkor");
@@ -200,16 +218,25 @@ CPU architecture: 8
                                               "CPU variant     : 0xc\n"
                                               "CPU part        : 0xafe"),
             "exynos-m3");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x53c0afe0, ArrayRef<uint64_t>{0x53c0afe0, 0x5300d050}),
+            "exynos-m3");
   // Verify Exynos M3.
   EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
                                               "CPU variant     : 0x1\n"
                                               "CPU part        : 0x002"),
             "exynos-m3");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x53100020, ArrayRef<uint64_t>{0x53100020, 0x5300d050}),
+            "exynos-m3");
   // Verify Exynos M4.
   EXPECT_EQ(sys::detail::getHostCPUNameForARM(ExynosProcCpuInfo +
                                               "CPU variant     : 0x1\n"
                                               "CPU part        : 0x003"),
             "exynos-m4");
+  EXPECT_EQ(sys::detail::getHostCPUNameForARM(
+                0x53100030, ArrayRef<uint64_t>{0x53100030, 0x5300d050}),
+            "exynos-m4");
 
   const std::string ThunderX2T99ProcCpuInfo = R"(
 processor	: 0



More information about the llvm-commits mailing list