[clang] [clang][x86] Add constexpr support for LZCNT/TZCNT intrinsics (PR #110499)

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 30 06:19:19 PDT 2024


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/110499

>From b61842abda168ea02bb6b25598ce6d08757bb4f7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Sep 2024 13:21:42 +0100
Subject: [PATCH 1/2] [clang][x86] Add constexpr support for LZCNT/TZCNT
 intrinsics

---
 clang/docs/ReleaseNotes.rst                  |  6 ++++
 clang/include/clang/Basic/BuiltinsX86.def    |  8 ++---
 clang/include/clang/Basic/BuiltinsX86_64.def |  4 +--
 clang/lib/AST/ExprConstant.cpp               | 18 +++++++++++
 clang/lib/Headers/bmiintrin.h                |  4 +++
 clang/lib/Headers/lzcntintrin.h              |  4 +++
 clang/test/CodeGen/X86/bmi-builtins.c        | 32 ++++++++++++++++++++
 clang/test/CodeGen/X86/lzcnt-builtins.c      | 27 ++++++++++++++++-
 8 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 35c31452cef411..d48601db023553 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -511,6 +511,12 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- The following bit manipulation intrinsics can now be used in constant expressions:
+  all lzcnt intrinsics in lzcntintrin.h 
+  all bextr intrinsics in bmiintrin.h
+  all tzcnt intrinsics in bmiintrin.h
+  all bextr intrinsics in tbmintrin.h
+
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4eb9bfbdd1735..e68dcd922acbff 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -551,13 +551,13 @@ TARGET_BUILTIN(__builtin_ia32_rdseed16_step, "UiUs*", "n", "rdseed")
 TARGET_BUILTIN(__builtin_ia32_rdseed32_step, "UiUi*", "n", "rdseed")
 
 // LZCNT
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "nc", "lzcnt")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u16, "UsUs", "ncE", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u32, "UiUi", "ncE", "lzcnt")
 
 // BMI
 TARGET_BUILTIN(__builtin_ia32_bextr_u32, "UiUiUi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u16, "UsUs", "ncE", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u32, "UiUi", "ncE", "")
 
 // BMI2
 TARGET_BUILTIN(__builtin_ia32_bzhi_si, "UiUiUi", "nc", "bmi2")
diff --git a/clang/include/clang/Basic/BuiltinsX86_64.def b/clang/include/clang/Basic/BuiltinsX86_64.def
index 81fd46ee6d1663..5f4252c91b8847 100644
--- a/clang/include/clang/Basic/BuiltinsX86_64.def
+++ b/clang/include/clang/Basic/BuiltinsX86_64.def
@@ -70,9 +70,9 @@ TARGET_BUILTIN(__builtin_ia32_addcarryx_u64, "UcUcUOiUOiUOi*", "n", "")
 TARGET_BUILTIN(__builtin_ia32_subborrow_u64, "UcUcUOiUOiUOi*", "n", "")
 TARGET_BUILTIN(__builtin_ia32_rdrand64_step, "UiUOi*", "n", "rdrnd")
 TARGET_BUILTIN(__builtin_ia32_rdseed64_step, "UiUOi*", "n", "rdseed")
-TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "nc", "lzcnt")
+TARGET_BUILTIN(__builtin_ia32_lzcnt_u64, "UOiUOi", "ncE", "lzcnt")
 TARGET_BUILTIN(__builtin_ia32_bextr_u64, "UOiUOiUOi", "ncE", "bmi")
-TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_tzcnt_u64, "UOiUOi", "ncE", "")
 TARGET_BUILTIN(__builtin_ia32_bzhi_di, "UOiUOiUOi", "nc", "bmi2")
 TARGET_BUILTIN(__builtin_ia32_pdep_di, "UOiUOiUOi", "nc", "bmi2")
 TARGET_BUILTIN(__builtin_ia32_pext_di, "UOiUOiUOi", "nc", "bmi2")
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51856693944761..834a7a1e2eb239 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13486,6 +13486,24 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     Result &= llvm::maskTrailingOnes<uint64_t>(Length);
     return Success(Result, E);
   }
+
+  case clang::X86::BI__builtin_ia32_lzcnt_u16:
+  case clang::X86::BI__builtin_ia32_lzcnt_u32:
+  case clang::X86::BI__builtin_ia32_lzcnt_u64: {
+    APSInt Val;
+    if (!EvaluateInteger(E->getArg(0), Val, Info))
+      return false;
+    return Success(Val.countLeadingZeros(), E);
+  }
+
+  case clang::X86::BI__builtin_ia32_tzcnt_u16:
+  case clang::X86::BI__builtin_ia32_tzcnt_u32:
+  case clang::X86::BI__builtin_ia32_tzcnt_u64: {
+    APSInt Val;
+    if (!EvaluateInteger(E->getArg(0), Val, Info))
+      return false;
+    return Success(Val.countTrailingZeros(), E);
+  }
   }
 }
 
diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index 72c84d12c0e520..b0f44367633ce9 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -17,7 +17,11 @@
 /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT
    instruction behaves as BSF on non-BMI targets, there is code that expects
    to use it as a potentially faster version of BSF. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) constexpr
+#else
 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+#endif
 
 /// Counts the number of trailing zero bits in the operand.
 ///
diff --git a/clang/lib/Headers/lzcntintrin.h b/clang/lib/Headers/lzcntintrin.h
index f4ddce9d0e6834..d746d91d9fe5f9 100644
--- a/clang/lib/Headers/lzcntintrin.h
+++ b/clang/lib/Headers/lzcntintrin.h
@@ -15,7 +15,11 @@
 #define __LZCNTINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
+#else
 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+#endif
 
 #ifndef _MSC_VER
 /// Counts the number of leading zero bits in the operand.
diff --git a/clang/test/CodeGen/X86/bmi-builtins.c b/clang/test/CodeGen/X86/bmi-builtins.c
index 530d38dcf342c0..6c0b2c440ea081 100644
--- a/clang/test/CodeGen/X86/bmi-builtins.c
+++ b/clang/test/CodeGen/X86/bmi-builtins.c
@@ -249,6 +249,26 @@ char bextr32_6[_bextr_u32(0x00000000, 0x00000000, 0x00000000) == 0x00000000 ? 1
 char bextr32_7[_bextr_u32(0x000003F0, 0xFFFFFF04, 0xFFFFFF10) == 0x0000003F ? 1 : -1];
 char bextr32_8[_bextr_u32(0x000003F0, 0xFFFFFF08, 0xFFFFFF30) == 0x00000003 ? 1 : -1];
 
+char tzcntu16_0[__tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcntu16_1[__tzcnt_u16(0x0001) ==  0 ? 1 : -1];
+char tzcntu16_2[__tzcnt_u16(0x0010) ==  4 ? 1 : -1];
+
+char tzcnt2u16_0[_tzcnt_u16(0x0000) == 16 ? 1 : -1];
+char tzcnt2u16_1[_tzcnt_u16(0x0001) ==  0 ? 1 : -1];
+char tzcnt2u16_2[_tzcnt_u16(0x0010) ==  4 ? 1 : -1];
+
+char tzcntu32_0[__tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcntu32_1[__tzcnt_u32(0x00000001) ==  0 ? 1 : -1];
+char tzcntu32_2[__tzcnt_u32(0x00000080) ==  7 ? 1 : -1];
+
+char tzcnt2u32_0[_tzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char tzcnt2u32_1[_tzcnt_u32(0x00000001) ==  0 ? 1 : -1];
+char tzcnt2u32_2[_tzcnt_u32(0x00000080) ==  7 ? 1 : -1];
+
+char tzcnt3u32_0[_mm_tzcnt_32(0x00000000) == 32 ? 1 : -1];
+char tzcnt3u32_1[_mm_tzcnt_32(0x00000001) ==  0 ? 1 : -1];
+char tzcnt3u32_2[_mm_tzcnt_32(0x00000080) ==  7 ? 1 : -1];
+
 #ifdef __x86_64__
 char bextr64_0[__bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
 char bextr64_1[__bextr_u64(0xF000000000000001ULL, 0x0000000000004001ULL) == 0x7800000000000000ULL ? 1 : -1];
@@ -261,5 +281,17 @@ char bextr64_5[_bextr2_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFF1001ULL) == 0x00
 char bextr64_6[_bextr_u64(0x0000000000000000ULL, 0x0000000000000000ULL, 0x0000000000000000ULL) == 0x0000000000000000ULL ? 1 : -1];
 char bextr64_7[_bextr_u64(0xF000000000000001ULL, 0x0000000000000001ULL, 0x0000000000000040ULL) == 0x7800000000000000ULL ? 1 : -1];
 char bextr64_8[_bextr_u64(0xF000000000000001ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF10ULL) == 0x0000000000000000ULL ? 1 : -1];
+
+char tzcntu64_0[__tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcntu64_1[__tzcnt_u64(0x0000000000000001ULL) ==  0 ? 1 : -1];
+char tzcntu64_2[__tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt2u64_0[_tzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt2u64_1[_tzcnt_u64(0x0000000000000001ULL) ==  0 ? 1 : -1];
+char tzcnt2u64_2[_tzcnt_u64(0x0000000800000000ULL) == 35 ? 1 : -1];
+
+char tzcnt3u64_0[_mm_tzcnt_64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char tzcnt3u64_1[_mm_tzcnt_64(0x0000000000000001ULL) ==  0 ? 1 : -1];
+char tzcnt3u64_2[_mm_tzcnt_64(0x0000000800000000ULL) == 35 ? 1 : -1];
 #endif
 #endif
\ No newline at end of file
diff --git a/clang/test/CodeGen/X86/lzcnt-builtins.c b/clang/test/CodeGen/X86/lzcnt-builtins.c
index 9255207ffaef4f..18ced89fc79b1c 100644
--- a/clang/test/CodeGen/X86/lzcnt-builtins.c
+++ b/clang/test/CodeGen/X86/lzcnt-builtins.c
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -x c++ -std=c++11 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +lzcnt -emit-llvm -o - | FileCheck %s
 
 
 #include <immintrin.h>
@@ -32,3 +33,27 @@ unsigned long long test__lzcnt_u64(unsigned long long __X)
   // CHECK: @llvm.ctlz.i64(i64 %{{.*}}, i1 false)
   return _lzcnt_u64(__X);
 }
+
+
+// Test constexpr handling.
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+char lzcnt16_0[__lzcnt16(0x0000) == 16 ? 1 : -1];
+char lzcnt16_1[__lzcnt16(0x8000) ==  0 ? 1 : -1];
+char lzcnt16_2[__lzcnt16(0x0010) == 11 ? 1 : -1];
+
+char lzcnt32_0[__lzcnt32(0x00000000) == 32 ? 1 : -1];
+char lzcnt32_1[__lzcnt32(0x80000000) ==  0 ? 1 : -1];
+char lzcnt32_2[__lzcnt32(0x00000010) == 27 ? 1 : -1];
+
+char lzcnt64_0[__lzcnt64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcnt64_1[__lzcnt64(0x8000000000000000ULL) ==  0 ? 1 : -1];
+char lzcnt64_2[__lzcnt64(0x0000000100000000ULL) == 31 ? 1 : -1];
+
+char lzcntu32_0[_lzcnt_u32(0x00000000) == 32 ? 1 : -1];
+char lzcntu32_1[_lzcnt_u32(0x80000000) ==  0 ? 1 : -1];
+char lzcntu32_2[_lzcnt_u32(0x00000010) == 27 ? 1 : -1];
+
+char lzcntu64_0[_lzcnt_u64(0x0000000000000000ULL) == 64 ? 1 : -1];
+char lzcntu64_1[_lzcnt_u64(0x8000000000000000ULL) ==  0 ? 1 : -1];
+char lzcntu64_2[_lzcnt_u64(0x0000000100000000ULL) == 31 ? 1 : -1];
+#endif
\ No newline at end of file

>From 4aaeb4f986d70cef39017acbf96f0aef6df751b6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Mon, 30 Sep 2024 14:18:57 +0100
Subject: [PATCH 2/2] clang-format headers

---
 clang/lib/Headers/bmiintrin.h   | 3 ++-
 clang/lib/Headers/lzcntintrin.h | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Headers/bmiintrin.h b/clang/lib/Headers/bmiintrin.h
index b0f44367633ce9..634fa39bfa1d7e 100644
--- a/clang/lib/Headers/bmiintrin.h
+++ b/clang/lib/Headers/bmiintrin.h
@@ -18,7 +18,8 @@
    instruction behaves as BSF on non-BMI targets, there is code that expects
    to use it as a potentially faster version of BSF. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) constexpr
+#define __RELAXED_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__)) constexpr
 #else
 #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
 #endif
diff --git a/clang/lib/Headers/lzcntintrin.h b/clang/lib/Headers/lzcntintrin.h
index d746d91d9fe5f9..db00474ffd3949 100644
--- a/clang/lib/Headers/lzcntintrin.h
+++ b/clang/lib/Headers/lzcntintrin.h
@@ -16,9 +16,11 @@
 
 /* Define the default attributes for the functions in this file. */
 #if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("lzcnt")))
 #endif
 
 #ifndef _MSC_VER



More information about the cfe-commits mailing list