[libcxx-commits] [libcxx] [libc++] Replace __libcpp_{ctz, clz} with __builtin_{ctzg, clzg} (PR #133920)

Wed Apr 2 19:42:12 PDT 2025

https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/133920

>From 60bbd97509b5778b1eab6326c102e6b274adad05 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 1 Apr 2025 10:45:09 -0400
Subject: [PATCH] Replace __libcpp_{ctz,clz} with __builtin_{ctzg,clzg}

---
 libcxx/include/__algorithm/sort.h             | 18 +++---
 libcxx/include/__bit/countl.h                 | 62 -------------------
 libcxx/include/__bit/countr.h                 | 49 ---------------
 libcxx/include/__bit_reference                |  4 +-
 libcxx/include/__charconv/to_chars_integral.h | 15 ++---
 libcxx/include/__charconv/traits.h            | 30 ++++-----
 libcxx/include/__hash_table                   |  2 +-
 7 files changed, 33 insertions(+), 147 deletions(-)

diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 4332b62544b40..77f347d37660c 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -357,10 +357,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
   // Swap one pair on each iteration as long as both bitsets have at least one
   // element for swapping.
   while (__left_bitset != 0 && __right_bitset != 0) {
-    difference_type __tz_left  = __libcpp_ctz(__left_bitset);
-    __left_bitset              = __libcpp_blsr(__left_bitset);
-    difference_type __tz_right = __libcpp_ctz(__right_bitset);
-    __right_bitset             = __libcpp_blsr(__right_bitset);
+    difference_type __tz_left  = std::__countr_zero(__left_bitset);
+    __left_bitset              = std::__libcpp_blsr(__left_bitset);
+    difference_type __tz_right = std::__countr_zero(__right_bitset);
+    __right_bitset             = std::__libcpp_blsr(__right_bitset);
     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
   }
 }
@@ -456,7 +456,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the left side.  Need to find set positions in the reverse
     // order.
     while (__left_bitset != 0) {
-      difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+      difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
       _RandomAccessIterator __it = __first + __tz_left;
       if (__it != __lm1) {
@@ -469,7 +469,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the right side.  Need to find set positions in the reverse
     // order.
     while (__right_bitset != 0) {
-      difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+      difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
       _RandomAccessIterator __it = __lm1 - __tz_right;
       if (__it != __first) {
@@ -831,11 +831,11 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
   if (__n == 0)
     return 0;
   if (sizeof(__n) <= sizeof(unsigned))
-    return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
+    return sizeof(unsigned) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned>(__n));
   if (sizeof(__n) <= sizeof(unsigned long))
-    return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
+    return sizeof(unsigned long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long>(__n));
   if (sizeof(__n) <= sizeof(unsigned long long))
-    return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
+    return sizeof(unsigned long long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long long>(__n));
 
   _Number __log2 = 0;
   while (__n > 1) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index d4df1d049b294..9c37749f92577 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -6,9 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-//  refactor this code to exclusively use __builtin_clzg.
-
 #ifndef _LIBCPP___BIT_COUNTL_H
 #define _LIBCPP___BIT_COUNTL_H
 
@@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
-  return __builtin_clz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
-  return __builtin_clzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_clzll(__x);
-}
-
-#if _LIBCPP_HAS_INT128
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
-#  if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#  else
-  // The function is written in this form due to C++ constexpr limitations.
-  // The algorithm:
-  // - Test whether any bit in the high 64-bits is set
-  // - No bits set:
-  //   - The high 64-bits contain 64 leading zeros,
-  //   - Add the result of the low 64-bits.
-  // - Any bits set:
-  //   - The number of leading zeros of the input is the number of leading
-  //     zeros in the high 64-bits.
-  return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
-                            : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
-#  endif
-}
-#endif // _LIBCPP_HAS_INT128
-
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
-#if __has_builtin(__builtin_clzg)
   return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
-#else  // __has_builtin(__builtin_clzg)
-  if (__t == 0)
-    return numeric_limits<_Tp>::digits;
-
-  if (sizeof(_Tp) <= sizeof(unsigned int))
-    return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
-           (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
-  else if (sizeof(_Tp) <= sizeof(unsigned long))
-    return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
-           (numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
-  else if (sizeof(_Tp) <= sizeof(unsigned long long))
-    return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
-           (numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
-  else {
-    int __ret                      = 0;
-    int __iter                     = 0;
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    while (true) {
-      __t = std::__rotl(__t, __ulldigits);
-      if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
-        break;
-      __ret += __iter;
-    }
-    return __ret + __iter;
-  }
-#endif // __has_builtin(__builtin_clzg)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 46c43921fc60d..f87175971bed3 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -6,14 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-//  refactor this code to exclusively use __builtin_ctzg.
-
 #ifndef _LIBCPP___BIT_COUNTR_H
 #define _LIBCPP___BIT_COUNTR_H
 
 #include <__assert>
-#include <__bit/rotate.h>
 #include <__concepts/arithmetic.h>
 #include <__config>
 #include <__type_traits/is_unsigned.h>
@@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
-  return __builtin_ctz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
-  return __builtin_ctzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_ctzll(__x);
-}
-
-// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
-// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
-template <class _Tp>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
-  _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
-  static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
-  if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
-    return std::__libcpp_ctz(static_cast<unsigned int>(__t));
-  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
-    return std::__libcpp_ctz(static_cast<unsigned long>(__t));
-  } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
-    return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-  } else {
-#if _LIBCPP_STD_VER == 11
-    unsigned long long __ull       = static_cast<unsigned long long>(__t);
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
-#else
-    int __ret                      = 0;
-    const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
-    while (static_cast<unsigned long long>(__t) == 0uLL) {
-      __ret += __ulldigits;
-      __t >>= __ulldigits;
-    }
-    return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-#endif
-  }
-}
-
 template <class _Tp>
 [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
   static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
-#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
   return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
-#else
-  return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
-#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 552e0f5e4d799..a3e6defd405f8 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -165,7 +165,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
@@ -234,7 +234,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 710299df9b4da..9959d61336833 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -118,9 +118,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significant bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
+    // effect since the code scans for the most significant bit set.
+    return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
   }
 
   template <typename _Tp>
@@ -154,9 +153,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
+    // effect since the code scans for the most significat bit set.
+    return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
   }
 
   template <typename _Tp>
@@ -190,9 +188,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
   template <typename _Tp>
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
-    // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
+    // effect since the code scans for the most significat bit set.
+    return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
   }
 
   template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index dd1fa2354436a..46a3a0f3d2e39 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -45,12 +45,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_32[__t]) + 1;
   }
 
@@ -71,12 +71,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_64[__t]) + 1;
   }
 
@@ -98,15 +98,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
-  /// function requires its input to have at least one bit set the value of
-  /// zero is set to one. This means the first element of the lookup table is
-  /// zero.
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+  /// used __libcpp_clz. Since that function requires its input to have at
+  /// least one bit set the value of zero is set to one. This means the first
+  /// element of the lookup table is zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
     _LIBCPP_ASSERT_INTERNAL(
         __v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
     // There's always a bit set in the upper 64-bits.
-    auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+    auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
     _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
     // __t is adjusted since the lookup table misses the lower entries.
     return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d7b312f8774fc..0860c53f84636 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
-  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
+  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>