[libcxx-commits] [libcxx] Fixed get count threads for multi-cpu system with NUMA architecture (#72267) (PR #72270)

Herman Semenov via libcxx-commits libcxx-commits at lists.llvm.org
Tue Nov 14 07:22:59 PST 2023


https://github.com/GermanAizek created https://github.com/llvm/llvm-project/pull/72270

Fixed very old problem on any Windows NT and modern Windows Server 😆 

https://developercommunity.visualstudio.com/t/hardware-concurrency-returns-an-incorrect-result/350854

https://stackoverflow.com/questions/31209256/reliable-way-to-programmatically-get-the-number-of-hardware-threads-on-windows

Why this commit is useful not only for server configurations, now a very cheap PC configuration is building from Xeon E54xx, X34xx, E3-xxxx, E5-xxxx, E7-xxx, any Silver, any Gold, any Platinum series, cheapest on LGA 2011v3 socket two-socket board with NUMA support is cheap on Alibaba, Baidu or Aliexpress and Amazon.

Examples:
https://www.alibaba.com/product-detail/DDR4-x99-dual-cpu-Lga2011-V3_1600443686429.html
https://www.amazon.com/Desktop-Motherboard-Gigabit-LGA2011-SATA3-0/dp/B0CC6KSTXC

>From 68ea9fc369f1f664bd021e4dc22efeb943f1cc0d Mon Sep 17 00:00:00 2001
From: German Semenov <GermanAizek at yandex.ru>
Date: Tue, 14 Nov 2023 18:21:51 +0300
Subject: [PATCH] [libc++] Fixed get count threads for multi-cpu system with
 NUMA architecture  (#72267)

---
 libcxx/src/thread.cpp | 55 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/libcxx/src/thread.cpp b/libcxx/src/thread.cpp
index 289c457cd5a5cbd..f32a2d4ed6d486e 100644
--- a/libcxx/src/thread.cpp
+++ b/libcxx/src/thread.cpp
@@ -81,9 +81,58 @@ thread::hardware_concurrency() noexcept
         return 0;
     return static_cast<unsigned>(result);
 #elif defined(_LIBCPP_WIN32API)
-    SYSTEM_INFO info;
-    GetSystemInfo(&info);
-    return info.dwNumberOfProcessors;
+    // This implementation supports both conventional single-cpu PC configurations
+    // and multi-cpu system on NUMA (Non-uniform_memory_access) architecture
+    DWORD length = 0;
+    unsigned concurrency = 0;
+    const auto validConcurrency = [&concurrency]() noexcept -> unsigned
+    {
+        if (concurrency == 0)
+        {
+            SYSTEM_INFO info;
+            GetSystemInfo(&info);
+            return info.dwNumberOfProcessors;
+        }
+        else
+        {
+            return concurrency;
+        }
+    };
+    if (GetLogicalProcessorInformationEx(RelationAll, nullptr, &length) != FALSE)
+    {
+        return validConcurrency();
+    }
+    if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
+    {
+        return validConcurrency();
+    }
+    std::unique_ptr<void, void (*)(void*)> buffer(std::malloc(length), std::free);
+    if (!buffer)
+    {
+        return validConcurrency();
+    }
+    auto* mem = reinterpret_cast<unsigned char*>(buffer.get());
+    if (GetLogicalProcessorInformationEx(
+        RelationAll, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(mem), &length) == false)
+    {
+        return validConcurrency();
+    }
+    DWORD i = 0;
+    while (i < length)
+    {
+        const auto* proc = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(mem + i);
+        if (proc->Relationship == RelationProcessorCore) {
+            for (WORD group = 0; group < proc->Processor.GroupCount; ++group)
+            {
+                for (KAFFINITY mask = proc->Processor.GroupMask[group].Mask; mask != 0; mask >>= 1)
+                {
+                    concurrency += mask & 1;
+                }
+            }
+        }
+        i += proc->Size;
+    }
+    return validConcurrency();
 #else  // defined(CTL_HW) && defined(HW_NCPU)
     // TODO: grovel through /proc or check cpuid on x86 and similar
     // instructions on other architectures.



More information about the libcxx-commits mailing list