[clang] [clang][x86] Add constexpr support for MULX intrinsics (PR #110654)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Oct 1 06:25:28 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
With this patch all BMI2 intrinsics can now be used in constant expressions
---
Full diff: https://github.com/llvm/llvm-project/pull/110654.diff
3 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (+1-1)
- (modified) clang/lib/Headers/bmi2intrin.h (+18-27)
- (modified) clang/test/CodeGen/X86/bmi2-builtins.c (+24)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a55a3b8687e46e..eb020895d856ad 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -522,7 +522,7 @@ X86 Support
- All intrinsics in bmiintrin.h can now be used in constant expressions.
-- All bzhi/pdep/pext intrinsics in bmi2intrin.h can now be used in constant expressions.
+- All intrinsics in bmi2intrin.h can now be used in constant expressions.
- All intrinsics in tbmintrin.h can now be used in constant expressions.
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index 7b2c2f145b14a0..bdb61b13fb83b6 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -15,12 +15,12 @@
#define __BMI2INTRIN_H
/* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
-
#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS \
+ __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
#endif
/// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
@@ -43,9 +43,8 @@
/// \param __Y
/// The lower 8 bits specify the bit number of the lowest bit to zero.
/// \returns The partially zeroed 32-bit value.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_bzhi_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_bzhi_si(__X, __Y);
}
@@ -73,9 +72,8 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
/// \param __Y
/// The 32-bit mask specifying where to deposit source bits.
/// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pdep_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_pdep_si(__X, __Y);
}
@@ -103,9 +101,8 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
/// \param __Y
/// The 32-bit mask specifying which source bits to extract.
/// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pext_u32(unsigned int __X, unsigned int __Y) {
return __builtin_ia32_pext_si(__X, __Y);
}
@@ -130,8 +127,7 @@ _pext_u32(unsigned int __X, unsigned int __Y)
/// A pointer to memory for storing the upper half of the product.
/// \returns The lower half of the product.
static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
-{
+_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
unsigned long long __res = (unsigned long long) __X * __Y;
*__P = (unsigned int)(__res >> 32);
return (unsigned int)__res;
@@ -159,9 +155,8 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
/// \param __Y
/// The lower 8 bits specify the bit number of the lowest bit to zero.
/// \returns The partially zeroed 64-bit value.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
return __builtin_ia32_bzhi_di(__X, __Y);
}
@@ -189,9 +184,8 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
/// \param __Y
/// The 64-bit mask specifying where to deposit source bits.
/// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pdep_u64(unsigned long long __X, unsigned long long __Y) {
return __builtin_ia32_pdep_di(__X, __Y);
}
@@ -219,9 +213,8 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
/// \param __Y
/// The 64-bit mask specifying which source bits to extract.
/// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pext_u64(unsigned long long __X, unsigned long long __Y) {
return __builtin_ia32_pext_di(__X, __Y);
}
@@ -247,8 +240,7 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
/// \returns The lower half of the product.
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_mulx_u64 (unsigned long long __X, unsigned long long __Y,
- unsigned long long *__P)
-{
+ unsigned long long *__P) {
unsigned __int128 __res = (unsigned __int128) __X * __Y;
*__P = (unsigned long long) (__res >> 64);
return (unsigned long long) __res;
@@ -257,6 +249,5 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
#endif /* __x86_64__ */
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif /* __BMI2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index e00bac4b51e903..48424f553768be 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -71,6 +71,18 @@ char pext32_1[_pext_u32(0x89ABCDEF, 0x000000F0) == 0x0000000E ? 1 : -1];
char pext32_2[_pext_u32(0x89ABCDEF, 0xF00000F0) == 0x0000008E ? 1 : -1];
char pext32_3[_pext_u32(0x89ABCDEF, 0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
+constexpr unsigned long long
+test_mulx_u32(unsigned int X, unsigned int Y)
+{
+ unsigned int H{};
+ return _mulx_u32(X, Y, &H) | ((unsigned long long) H << 32);
+}
+
+void mulxu32() {
+ constexpr unsigned X = 0x89ABCDEF, Y = 0x01234567;
+ static_assert(test_mulx_u32(X,Y) == ((unsigned long long)X * Y));
+}
+
#ifdef __x86_64__
char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL, 0) == 0x0000000000000000ULL ? 1 : -1];
char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL, 32) == 0x0000000089ABCDEFULL ? 1 : -1];
@@ -86,5 +98,17 @@ char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x00000
char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
+
+constexpr unsigned __int128
+test_mulx_u64(unsigned long long X, unsigned long long Y)
+{
+ unsigned long long H{};
+ return _mulx_u64(X, Y, &H) | ((unsigned __int128) H << 64);
+}
+
+void mulxu64() {
+ constexpr unsigned long long X = 0x0123456789ABCDEFULL, Y = 0xFEDCBA9876543210ULL;
+ static_assert(test_mulx_u64(X,Y) == ((unsigned __int128)X * Y));
+}
#endif
#endif
\ No newline at end of file
``````````
</details>
https://github.com/llvm/llvm-project/pull/110654
More information about the cfe-commits
mailing list