[compiler-rt] [llvm] [compiler-rt][X86] Use functions in cpuid.h instead of inline assembly (PR #97877)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 5 23:36:28 PDT 2024
https://github.com/boomanaiden154 created https://github.com/llvm/llvm-project/pull/97877
This patch makes the host/feature detection in compiler-rt and LLVM use the functions provided in cpuid.h(__get_cpuid, __get_cpuid_count) instead of inline assembly. This simplifies the implementation and moves any inline assembly away to a more common place.
A while ago, some similar cleanup was attempted, but this ended up resulting in some compilation errors due to toolchain minimum version issues (https://bugs.llvm.org/show_bug.cgi?id=30384). After the reversion landed, there have been no attempts since then to clean up the code, even though the minimum supported compilers now support the relevant functions (https://godbolt.org/z/o1Mjz8ndv).
>From 5587b6487137986759ba6b42cca6ea4db25126ec Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Sat, 6 Jul 2024 06:26:54 +0000
Subject: [PATCH] [compiler-rt][X86] Use functions in cpuid.h instead of inline
assembly
This patch makes the host/feature detection in compiler-rt and LLVM use
the functions provided in cpuid.h(__get_cpuid, __get_cpuid_count)
instead of inline assembly. This simplifies the implementation and moves
any inline assembly away to a more common place.
A while ago, some similar cleanup was attempted, but this ended up
resulting in some compilation errors due to toolchain minimum version
issues (https://bugs.llvm.org/show_bug.cgi?id=30384). After the
reversion landed, there have been no attempts since then to clean up the
code, even though the minimum supported compilers now support the
relevant functions (https://godbolt.org/z/o1Mjz8ndv).
---
compiler-rt/lib/builtins/cpu_model/x86.c | 86 ++++------------------
llvm/lib/TargetParser/Host.cpp | 90 ++++--------------------
2 files changed, 27 insertions(+), 149 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 7e8acb3e73eda..254dff175b997 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -23,6 +23,10 @@
#include <assert.h>
+#if defined(__GNUC__) || defined(__clang__)
+#include <cpuid.h>
+#endif
+
#ifdef _MSC_VER
#include <intrin.h>
#endif
@@ -224,38 +228,6 @@ enum ProcessorFeatures {
CPU_FEATURE_MAX
};
-// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
-// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
-// support. Consequently, for i386, the presence of CPUID is checked first
-// via the corresponding eflags bit.
-static bool isCpuIdSupported(void) {
-#if defined(__GNUC__) || defined(__clang__)
-#if defined(__i386__)
- int __cpuid_supported;
- __asm__(" pushfl\n"
- " popl %%eax\n"
- " movl %%eax,%%ecx\n"
- " xorl $0x00200000,%%eax\n"
- " pushl %%eax\n"
- " popfl\n"
- " pushfl\n"
- " popl %%eax\n"
- " movl $0,%0\n"
- " cmpl %%eax,%%ecx\n"
- " je 1f\n"
- " movl $1,%0\n"
- "1:"
- : "=r"(__cpuid_supported)
- :
- : "eax", "ecx");
- if (!__cpuid_supported)
- return false;
-#endif
- return true;
-#endif
- return true;
-}
-
// This code is copied from lib/Support/Host.cpp.
// Changes to either file should be mirrored in the other.
@@ -264,25 +236,7 @@ static bool isCpuIdSupported(void) {
static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
#if defined(__GNUC__) || defined(__clang__)
-#if defined(__x86_64__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
- // FIXME: should we save this for Clang?
- __asm__("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value));
- return false;
-#elif defined(__i386__)
- __asm__("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value));
- return false;
-#else
- return true;
-#endif
+ return !__get_cpuid(value, rEAX, rEBX, rECX, rEDX);
#elif defined(_MSC_VER)
// The MSVC intrinsic is portable across x86 and x64.
int registers[4];
@@ -303,26 +257,12 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
unsigned *rEDX) {
+ // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang
+ // are such that __cpuidex is defined within cpuid.h for both, we can remove
+ // the __get_cpuid_count function and share the MSVC implementation between
+ // all three.
#if defined(__GNUC__) || defined(__clang__)
-#if defined(__x86_64__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
- // FIXME: should we save this for Clang?
- __asm__("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value), "c"(subleaf));
- return false;
-#elif defined(__i386__)
- __asm__("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value), "c"(subleaf));
- return false;
-#else
- return true;
-#endif
+ return !__get_cpuid_count(value, subleaf, rEAX, rEBX, rECX, rEDX);
#elif defined(_MSC_VER)
int registers[4];
__cpuidex(registers, value, subleaf);
@@ -338,6 +278,9 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
// Read control register 0 (XCR0). Used to detect features such as AVX.
static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+ // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang
+ // are such that _xgetbv is supported by both, we can unify the implementation
+ // with MSVC and remove all inline assembly.
#if defined(__GNUC__) || defined(__clang__)
// Check xgetbv; this uses a .byte sequence instead of the instruction
// directly because older assemblers do not include support for xgetbv and
@@ -1108,8 +1051,7 @@ int CONSTRUCTOR_ATTRIBUTE __cpu_indicator_init(void) {
if (__cpu_model.__cpu_vendor)
return 0;
- if (!isCpuIdSupported() ||
- getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
+ if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX) || MaxLeaf < 1) {
__cpu_model.__cpu_vendor = VENDOR_OTHER;
return -1;
}
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 2ea56746aff24..1b4a7327eb8e5 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -50,6 +50,9 @@
#if defined(__sun__) && defined(__svr4__)
#include <kstat.h>
#endif
+#if defined(__GNUC__) || defined(__clang__)
+#include <cpuid.h>
+#endif
#define DEBUG_TYPE "host-detection"
@@ -522,67 +525,14 @@ StringRef sys::detail::getHostCPUNameForBPF() {
}
#if defined(__i386__) || defined(_M_IX86) || \
- defined(__x86_64__) || defined(_M_X64)
-
-// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
-// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
-// support. Consequently, for i386, the presence of CPUID is checked first
-// via the corresponding eflags bit.
-// Removal of cpuid.h header motivated by PR30384
-// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
-// or test-suite, but are used in external projects e.g. libstdcxx
-static bool isCpuIdSupported() {
-#if defined(__GNUC__) || defined(__clang__)
-#if defined(__i386__)
- int __cpuid_supported;
- __asm__(" pushfl\n"
- " popl %%eax\n"
- " movl %%eax,%%ecx\n"
- " xorl $0x00200000,%%eax\n"
- " pushl %%eax\n"
- " popfl\n"
- " pushfl\n"
- " popl %%eax\n"
- " movl $0,%0\n"
- " cmpl %%eax,%%ecx\n"
- " je 1f\n"
- " movl $1,%0\n"
- "1:"
- : "=r"(__cpuid_supported)
- :
- : "eax", "ecx");
- if (!__cpuid_supported)
- return false;
-#endif
- return true;
-#endif
- return true;
-}
+ defined(__x86_64__) || defined(_M_X64)
/// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
/// the specified arguments. If we can't run cpuid on the host, return true.
static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
unsigned *rECX, unsigned *rEDX) {
#if defined(__GNUC__) || defined(__clang__)
-#if defined(__x86_64__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
- // FIXME: should we save this for Clang?
- __asm__("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value));
- return false;
-#elif defined(__i386__)
- __asm__("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value));
- return false;
-#else
- return true;
-#endif
+ return !__get_cpuid(value, rEAX, rEBX, rECX, rEDX);
#elif defined(_MSC_VER)
// The MSVC intrinsic is portable across x86 and x64.
int registers[4];
@@ -609,9 +559,6 @@ VendorSignatures getVendorSignature(unsigned *MaxLeaf) {
else
*MaxLeaf = 0;
- if (!isCpuIdSupported())
- return VendorSignatures::UNKNOWN;
-
if (getX86CpuIDAndInfo(0, MaxLeaf, &EBX, &ECX, &EDX) || *MaxLeaf < 1)
return VendorSignatures::UNKNOWN;
@@ -639,26 +586,12 @@ using namespace llvm::sys::detail::x86;
static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
unsigned *rEDX) {
+ // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang
+ // are such that __cpuidex is defined within cpuid.h for both, we can remove
+ // the __get_cpuid_count function and share the MSVC implementation between
+ // all three.
#if defined(__GNUC__) || defined(__clang__)
-#if defined(__x86_64__)
- // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
- // FIXME: should we save this for Clang?
- __asm__("movq\t%%rbx, %%rsi\n\t"
- "cpuid\n\t"
- "xchgq\t%%rbx, %%rsi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value), "c"(subleaf));
- return false;
-#elif defined(__i386__)
- __asm__("movl\t%%ebx, %%esi\n\t"
- "cpuid\n\t"
- "xchgl\t%%ebx, %%esi\n\t"
- : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
- : "a"(value), "c"(subleaf));
- return false;
-#else
- return true;
-#endif
+ return !__get_cpuid_count(value, subleaf, rEAX, rEBX, rECX, rEDX);
#elif defined(_MSC_VER)
int registers[4];
__cpuidex(registers, value, subleaf);
@@ -674,6 +607,9 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
// Read control register 0 (XCR0). Used to detect features such as AVX.
static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
+ // TODO(boomanaiden154): When the minimum toolchain versions for gcc and clang
+ // are such that _xgetbv is supported by both, we can unify the implementation
+ // with MSVC and remove all inline assembly.
#if defined(__GNUC__) || defined(__clang__)
// Check xgetbv; this uses a .byte sequence instead of the instruction
// directly because older assemblers do not include support for xgetbv and
More information about the llvm-commits
mailing list