[clang] 8cd8bd4 - Implement __cpuid and __cpuidex as Clang builtins

Hans Wennborg via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 18 10:14:11 PDT 2022


Author: Alan Zhao
Date: 2022-03-18T18:13:52+01:00
New Revision: 8cd8bd4a5ca702024b9df53a9c7984bb700488a0

URL: https://github.com/llvm/llvm-project/commit/8cd8bd4a5ca702024b9df53a9c7984bb700488a0
DIFF: https://github.com/llvm/llvm-project/commit/8cd8bd4a5ca702024b9df53a9c7984bb700488a0.diff

LOG: Implement __cpuid and __cpuidex as Clang builtins

https://reviews.llvm.org/D23944 implemented the #pragma intrinsic from
MSVC. This causes the statement #pragma intrinsic(cpuid) to fail [0]
on Clang because cpuid is currently implemented in intrin.h instead
of a Clang builtin. Reimplementing cpuid (as well as it's releated
function, cpuidex) should resolve this.

[0]: https://crbug.com/1279344

Differential revision: https://reviews.llvm.org/D121653

Added: 
    

Modified: 
    clang/include/clang/Basic/BuiltinsX86.def
    clang/lib/CodeGen/CGBuiltin.cpp
    clang/lib/Headers/intrin.h
    clang/test/CodeGen/ms-intrinsics-cpuid.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 51d5db64f333d..0dd7a4daa3021 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2071,6 +2071,9 @@ TARGET_HEADER_BUILTIN(_ReadWriteBarrier, "v", "nh", "intrin.h", ALL_MS_LANGUAGES
 TARGET_HEADER_BUILTIN(_ReadBarrier,      "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(_WriteBarrier,     "v", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
 
+TARGET_HEADER_BUILTIN(__cpuid,   "vi*i",  "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(__cpuidex, "vi*ii", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
 TARGET_HEADER_BUILTIN(__emul,  "LLiii",    "nch", "intrin.h", ALL_MS_LANGUAGES, "")
 TARGET_HEADER_BUILTIN(__emulu, "ULLiUiUi", "nch", "intrin.h", ALL_MS_LANGUAGES, "")
 

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 39e88482db94d..af0b691b24f8f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14922,6 +14922,46 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     return EmitX86Select(*this, Ops[2], Res, Ops[1]);
   }
 
+  case X86::BI__cpuid:
+  case X86::BI__cpuidex: {
+    Value *FuncId = EmitScalarExpr(E->getArg(1));
+    Value *SubFuncId = BuiltinID == X86::BI__cpuidex
+                           ? EmitScalarExpr(E->getArg(2))
+                           : llvm::ConstantInt::get(Int32Ty, 0);
+
+    llvm::StructType *CpuidRetTy =
+        llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty, Int32Ty);
+    llvm::FunctionType *FTy =
+        llvm::FunctionType::get(CpuidRetTy, {Int32Ty, Int32Ty}, false);
+
+    StringRef Asm, Constraints;
+    if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
+      Asm = "cpuid";
+      Constraints = "={ax},={bx},={cx},={dx},{ax},{cx}";
+    } else {
+      // x86-64 uses %rbx as the base register, so preserve it.
+      Asm = "xchgq %rbx, ${1:q}\n"
+            "cpuid\n"
+            "xchgq %rbx, ${1:q}";
+      Constraints = "={ax},=r,={cx},={dx},0,2";
+    }
+
+    llvm::InlineAsm *IA = llvm::InlineAsm::get(FTy, Asm, Constraints,
+                                               /*hasSideEffects=*/false);
+    Value *IACall = Builder.CreateCall(IA, {FuncId, SubFuncId});
+    Value *BasePtr = EmitScalarExpr(E->getArg(0));
+    Value *Store = nullptr;
+    for (unsigned i = 0; i < 4; i++) {
+      Value *Extracted = Builder.CreateExtractValue(IACall, i);
+      Value *StorePtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, BasePtr, i);
+      Store = Builder.CreateAlignedStore(Extracted, StorePtr, getIntAlign());
+    }
+
+    // Return the last store instruction to signal that we have emitted the
+    // the intrinsic.
+    return Store;
+  }
+
   case X86::BI__emul:
   case X86::BI__emulu: {
     llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);

diff  --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index 02e66d02067c3..741b21eef32a6 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -534,27 +534,6 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
 |* Misc
 \*----------------------------------------------------------------------------*/
 #if defined(__i386__) || defined(__x86_64__)
-#if defined(__i386__)
-#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx)             \
-  __asm("cpuid"                                                                \
-        : "=a"(__eax), "=b"(__ebx), "=c"(__ecx), "=d"(__edx)                   \
-        : "0"(__leaf), "2"(__count))
-#else
-/* x86-64 uses %rbx as the base register, so preserve it. */
-#define __cpuid_count(__leaf, __count, __eax, __ebx, __ecx, __edx)             \
-  __asm("xchg{q} {%%rbx, %q1|%q1, rbx}\n"                                      \
-        "cpuid\n"                                                              \
-        "xchg{q} {%%rbx, %q1|%q1, rbx}"                                        \
-        : "=a"(__eax), "=r"(__ebx), "=c"(__ecx), "=d"(__edx)                   \
-        : "0"(__leaf), "2"(__count))
-#endif
-static __inline__ void __DEFAULT_FN_ATTRS __cpuid(int __info[4], int __level) {
-  __cpuid_count(__level, 0, __info[0], __info[1], __info[2], __info[3]);
-}
-static __inline__ void __DEFAULT_FN_ATTRS __cpuidex(int __info[4], int __level,
-                                                    int __ecx) {
-  __cpuid_count(__level, __ecx, __info[0], __info[1], __info[2], __info[3]);
-}
 static __inline__ void __DEFAULT_FN_ATTRS __halt(void) {
   __asm__ volatile("hlt");
 }

diff  --git a/clang/test/CodeGen/ms-intrinsics-cpuid.c b/clang/test/CodeGen/ms-intrinsics-cpuid.c
index 7cb401cd37034..2d0369e3fee60 100644
--- a/clang/test/CodeGen/ms-intrinsics-cpuid.c
+++ b/clang/test/CodeGen/ms-intrinsics-cpuid.c
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:         -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
+// RUN:         -Werror -triple i686-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X86
 // RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
-// RUN:         -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X64
+// RUN:         -Werror -triple x86_64-windows-msvc -emit-llvm %s -o - | FileCheck %s --check-prefix=X64
 
 // intrin.h needs size_t, but -ffreestanding prevents us from getting it from
 // stddef.h.  Work around it with this typedef.
@@ -9,15 +9,72 @@ typedef __SIZE_TYPE__ size_t;
 
 #include <intrin.h>
 
-void test__cpuid(int *info, int level) {
-  __cpuid(info, level);
+#pragma intrinsic(__cpuid)
+
+void test__cpuid(int cpuInfo[4], int function_id) {
+  __cpuid(cpuInfo, function_id);
 }
 // X86-LABEL: define {{.*}} @test__cpuid(i32* noundef %{{.*}}, i32 noundef %{{.*}})
-// X86: call { i32, i32, i32, i32 } asm "cpuid",
-// X86-SAME:   "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"
-// X86-SAME:   (i32 %{{.*}}, i32 0)
+// X86-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},{ax},{cx}"
+// X86-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
+// X86-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
+// X86-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
+// X86-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
+// X86-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
+// X86-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
+// X86-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
+// X86-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
+// X86-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
+// X86-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
+// X86-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
+// X86-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4
 
 // X64-LABEL: define {{.*}} @test__cpuid(i32* noundef %{{.*}}, i32 noundef %{{.*}})
-// X64: call { i32, i32, i32, i32 } asm "xchg$(q$) $(%rbx{{.*}}$){{.*}}cpuid{{.*}}xchg$(q$) $(%rbx{{.*}}$)",
-// X64-SAME:   "={ax},=r,={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"
-// X64-SAME:   (i32 %{{.*}}, i32 0)
+// X64-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "xchgq %rbx, ${1:q}\0Acpuid\0Axchgq %rbx, ${1:q}", "={ax},=r,={cx},={dx},0,2"
+// X64-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
+// X64-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
+// X64-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
+// X64-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
+// X64-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
+// X64-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
+// X64-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
+// X64-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
+// X64-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
+// X64-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
+// X64-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
+// X64-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4
+
+#pragma intrinsic(__cpuidex)
+
+void test__cpuidex(int cpuInfo[4], int function_id, int subfunction_id) {
+  __cpuidex(cpuInfo, function_id, subfunction_id);
+}
+// X86-LABEL: define {{.*}} @test__cpuidex(i32* noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
+// X86-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},{ax},{cx}"
+// X86-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
+// X86-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
+// X86-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
+// X86-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
+// X86-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
+// X86-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
+// X86-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
+// X86-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
+// X86-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
+// X86-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
+// X86-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
+// X86-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4
+
+// X64-LABEL: define {{.*}} @test__cpuidex(i32* noundef %{{.*}}, i32 noundef %{{.*}}, i32 noundef %{{.*}})
+// X64-DAG: [[ASMRESULTS:%[0-9]+]] = call { i32, i32, i32, i32 } asm "xchgq %rbx, ${1:q}\0Acpuid\0Axchgq %rbx, ${1:q}", "={ax},=r,={cx},={dx},0,2"
+// X64-DAG: [[ADDRPTR0:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 0
+// X64-DAG: [[ADDRPTR1:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 1
+// X64-DAG: [[ADDRPTR2:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 2
+// X64-DAG: [[ADDRPTR3:%[0-9]+]] = getelementptr inbounds i32, i32* %{{.*}}, i32 3
+// X64-DAG: [[RESULT0:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 0
+// X64-DAG: [[RESULT1:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 1
+// X64-DAG: [[RESULT2:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 2
+// X64-DAG: [[RESULT3:%[0-9]+]] = extractvalue { i32, i32, i32, i32 } [[ASMRESULTS]], 3
+// X64-DAG: store i32 [[RESULT0]], i32* [[ADDRPTR0]], align 4
+// X64-DAG: store i32 [[RESULT1]], i32* [[ADDRPTR1]], align 4
+// X64-DAG: store i32 [[RESULT2]], i32* [[ADDRPTR2]], align 4
+// X64-DAG: store i32 [[RESULT3]], i32* [[ADDRPTR3]], align 4


        


More information about the cfe-commits mailing list