[clang] [clang][x86] Add constexpr support for MULX intrinsics (PR #110654)

Tue Oct 1 06:25:28 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

With this patch all BMI2 intrinsics can now be used in constant expressions

---
Full diff: https://github.com/llvm/llvm-project/pull/110654.diff


3 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+1-1) 
- (modified) clang/lib/Headers/bmi2intrin.h (+18-27) 
- (modified) clang/test/CodeGen/X86/bmi2-builtins.c (+24) 


``````````diff

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a55a3b8687e46e..eb020895d856ad 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -522,7 +522,7 @@ X86 Support
 
 - All intrinsics in bmiintrin.h can now be used in constant expressions.
 
-- All bzhi/pdep/pext intrinsics in bmi2intrin.h can now be used in constant expressions.
+- All intrinsics in bmi2intrin.h can now be used in constant expressions.
 
 - All intrinsics in tbmintrin.h can now be used in constant expressions.
 
diff --git a/clang/lib/Headers/bmi2intrin.h b/clang/lib/Headers/bmi2intrin.h
index 7b2c2f145b14a0..bdb61b13fb83b6 100644
--- a/clang/lib/Headers/bmi2intrin.h
+++ b/clang/lib/Headers/bmi2intrin.h
@@ -15,12 +15,12 @@
 #define __BMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
-
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
 #endif
 
 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
@@ -43,9 +43,8 @@
 /// \param __Y
 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
 /// \returns The partially zeroed 32-bit value.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_bzhi_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_bzhi_si(__X, __Y);
 }
 
@@ -73,9 +72,8 @@ _bzhi_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying where to deposit source bits.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pdep_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pdep_si(__X, __Y);
 }
 
@@ -103,9 +101,8 @@ _pdep_u32(unsigned int __X, unsigned int __Y)
 /// \param __Y
 ///    The 32-bit mask specifying which source bits to extract.
 /// \returns The 32-bit result.
-static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u32(unsigned int __X, unsigned int __Y)
-{
+static __inline__ unsigned int __DEFAULT_FN_ATTRS
+_pext_u32(unsigned int __X, unsigned int __Y) {
   return __builtin_ia32_pext_si(__X, __Y);
 }
 
@@ -130,8 +127,7 @@ _pext_u32(unsigned int __X, unsigned int __Y)
 ///    A pointer to memory for storing the upper half of the product.
 /// \returns The lower half of the product.
 static __inline__ unsigned int __DEFAULT_FN_ATTRS
-_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
-{
+_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
   unsigned long long __res = (unsigned long long) __X * __Y;
   *__P = (unsigned int)(__res >> 32);
   return (unsigned int)__res;
@@ -159,9 +155,8 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P)
 /// \param __Y
 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
 /// \returns The partially zeroed 64-bit value.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_bzhi_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_bzhi_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_bzhi_di(__X, __Y);
 }
 
@@ -189,9 +184,8 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying where to deposit source bits.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pdep_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pdep_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pdep_di(__X, __Y);
 }
 
@@ -219,9 +213,8 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y)
 /// \param __Y
 ///    The 64-bit mask specifying which source bits to extract.
 /// \returns The 64-bit result.
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR
-_pext_u64(unsigned long long __X, unsigned long long __Y)
-{
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+_pext_u64(unsigned long long __X, unsigned long long __Y) {
   return __builtin_ia32_pext_di(__X, __Y);
 }
 
@@ -247,8 +240,7 @@ _pext_u64(unsigned long long __X, unsigned long long __Y)
 /// \returns The lower half of the product.
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
-	   unsigned long long *__P)
-{
+           unsigned long long *__P) {
   unsigned __int128 __res = (unsigned __int128) __X * __Y;
   *__P = (unsigned long long) (__res >> 64);
   return (unsigned long long) __res;
@@ -257,6 +249,5 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 #endif /* __x86_64__  */
 
 #undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
 
 #endif /* __BMI2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/bmi2-builtins.c b/clang/test/CodeGen/X86/bmi2-builtins.c
index e00bac4b51e903..48424f553768be 100644
--- a/clang/test/CodeGen/X86/bmi2-builtins.c
+++ b/clang/test/CodeGen/X86/bmi2-builtins.c
@@ -71,6 +71,18 @@ char pext32_1[_pext_u32(0x89ABCDEF,  0x000000F0) == 0x0000000E ? 1 : -1];
 char pext32_2[_pext_u32(0x89ABCDEF,  0xF00000F0) == 0x0000008E ? 1 : -1];
 char pext32_3[_pext_u32(0x89ABCDEF,  0xFFFFFFFF) == 0x89ABCDEF ? 1 : -1];
 
+constexpr unsigned long long
+test_mulx_u32(unsigned int X, unsigned int Y)
+{
+  unsigned int H{};
+  return _mulx_u32(X, Y, &H) | ((unsigned long long) H << 32);
+}
+
+void mulxu32() {
+  constexpr unsigned X = 0x89ABCDEF, Y = 0x01234567;
+  static_assert(test_mulx_u32(X,Y) == ((unsigned long long)X * Y));
+}
+
 #ifdef __x86_64__
 char bzhi64_0[_bzhi_u64(0x0123456789ABCDEFULL,   0) == 0x0000000000000000ULL ? 1 : -1];
 char bzhi64_1[_bzhi_u64(0x0123456789ABCDEFULL,  32) == 0x0000000089ABCDEFULL ? 1 : -1];
@@ -86,5 +98,17 @@ char pext64_0[_pext_u64(0x0123456789ABCDEFULL, 0x0000000000000000ULL) == 0x00000
 char pext64_1[_pext_u64(0x0123456789ABCDEFULL, 0x00000000000000F0ULL) == 0x000000000000000EULL ? 1 : -1];
 char pext64_2[_pext_u64(0x0123456789ABCDEFULL, 0xF00000F0F00000F0ULL) == 0x000000000000068EULL ? 1 : -1];
 char pext64_3[_pext_u64(0x0123456789ABCDEFULL, 0xFFFFFFFFFFFFFFFFULL) == 0x0123456789ABCDEFULL ? 1 : -1];
+
+constexpr unsigned __int128
+test_mulx_u64(unsigned long long X, unsigned long long Y)
+{
+  unsigned long long H{};
+  return _mulx_u64(X, Y, &H) | ((unsigned __int128) H << 64);
+}
+
+void mulxu64() {
+  constexpr unsigned long long X = 0x0123456789ABCDEFULL, Y = 0xFEDCBA9876543210ULL;
+  static_assert(test_mulx_u64(X,Y) == ((unsigned __int128)X * Y));
+}
 #endif
 #endif
\ No newline at end of file

``````````

</details>


https://github.com/llvm/llvm-project/pull/110654