[libcxx-commits] [libcxx] [libcxx] Use generic builtins for popcount, clz and ctz (PR #86563)

Marc Auberer via libcxx-commits libcxx-commits at lists.llvm.org
Mon Mar 25 15:49:02 PDT 2024


https://github.com/marcauberer updated https://github.com/llvm/llvm-project/pull/86563

>From 9d22aef5c56f36de60248b37933fd6e674f28f9c Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 19:53:01 +0100
Subject: [PATCH 1/3] [libcxx] Use generic builtins for popcount, clz and ctz

Use __builtin_popcountg instead of __buildin_popcount{l|ll}
Use __builtin_clzg instead of __buildin_clz{l|ll}
Use __builtin_ctzg instead of __builtin_ctz{l|ll}
---
 libcxx/include/__bit/countl.h           | 16 ++++++++++++++++
 libcxx/include/__bit/countr.h           | 12 ++++++++++++
 libcxx/include/__bit/popcount.h         | 12 ++++++++++++
 libcxx/src/include/ryu/d2s_intrinsics.h |  2 +-
 libcxx/src/include/ryu/ryu.h            |  5 +++--
 libcxx/src/ryu/f2s.cpp                  |  2 +-
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 396cfc2c3f4064..bd87f8903787ce 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clz(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clzl(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clzll(__x);
+#endif
 }
 
 #ifndef _LIBCPP_HAS_NO_INT128
@@ -47,8 +59,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
   // - Any bits set:
   //   - The number of leading zeros of the input is the number of leading
   //     zeros in the high 64-bits.
+#  if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#  else
   return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
                             : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+#  endif
 }
 #endif // _LIBCPP_HAS_NO_INT128
 
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b6b3ac52ca4e47..af347688888739 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctz(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctzl(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctzll(__x);
+#endif
 }
 
 template <class _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b0319cef251894..85ba84a572dd83 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcount(__x);
+#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcountl(__x);
+#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcountll(__x);
+#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index be50361fb3b334..afe64649a0be1c 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
-  // __builtin_ctzll doesn't appear to be faster here.
+  // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1ull << __p) - 1)) == 0;
 }
 
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 7b19ecfec5915a..85831bed61b21c 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -43,6 +43,7 @@
 // Avoid formatting to keep the changes with the original code minimal.
 // clang-format off
 
+#include <__bit/countr.h>
 #include <__charconv/chars_format.h>
 #include <__charconv/to_chars_result.h>
 #include <__config>
@@ -72,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctzll(__mask);
+  *__index = __libcpp_ctz(__mask);
   return true;
 }
 
@@ -80,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctz(__mask);
+  *__index = __libcpp_ctz(__mask);
   return true;
 }
 #endif  // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index f42fbd68c91d2d..e7b5d39669f990 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
-  // __builtin_ctz doesn't appear to be faster here.
+  // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1u << __p) - 1)) == 0;
 }
 

>From 89eaec394468d493da57c4a602b00ffea3eaef66 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 22:30:22 +0100
Subject: [PATCH 2/3] Shortcuts that benefit from generic builtins

---
 libcxx/include/__bit/countl.h   | 4 ++++
 libcxx/include/__bit/countr.h   | 4 ++++
 libcxx/include/__bit/popcount.h | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index bd87f8903787ce..af0409950837b8 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -74,6 +74,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
+#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -95,6 +98,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
     }
     return __ret + __iter;
   }
+#endif // __has_builtin(__builtin_clzg)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index af347688888739..6c2b2a74ee7ac7 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -52,6 +52,9 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_ctz)
+  return __builtin_ctz(__t);
+#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -67,6 +70,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
     }
     return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
   }
+#endif // __has_builtin(__builtin_ctz)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 85ba84a572dd83..63e0aadec0496e 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -51,6 +51,9 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo
 
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+#  if __has_builtin(__builtin_popcount)
+  return __builtin_popcount(__t);
+#  else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_popcount(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -65,6 +68,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex
     }
     return __ret;
   }
+#  endif // __has_builtin(__builtin_popcount)
 }
 
 #endif // _LIBCPP_STD_VER >= 20

>From 21e48444c3069627abda136f3feaa3cf00e08602 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 23:32:59 +0100
Subject: [PATCH 3/3] Simplify

---
 libcxx/include/__algorithm/count.h            |  6 ++--
 libcxx/include/__algorithm/find.h             |  6 ++--
 libcxx/include/__algorithm/sort.h             | 14 ++++----
 libcxx/include/__bit/countl.h                 | 36 +++++++------------
 libcxx/include/__bit/countr.h                 | 28 ++++++---------
 libcxx/include/__bit/popcount.h               | 35 ++++++++----------
 libcxx/include/__bit_reference                |  4 +--
 libcxx/include/__charconv/to_chars_integral.h | 12 +++----
 libcxx/include/__charconv/traits.h            | 12 +++----
 libcxx/include/__hash_table                   |  2 +-
 .../include/__stop_token/atomic_unique_lock.h |  2 +-
 libcxx/src/include/ryu/ryu.h                  |  4 +--
 12 files changed, 69 insertions(+), 92 deletions(-)

diff --git a/libcxx/include/__algorithm/count.h b/libcxx/include/__algorithm/count.h
index 23a7d3c4dcfed6..cd6fd984333341 100644
--- a/libcxx/include/__algorithm/count.h
+++ b/libcxx/include/__algorithm/count.h
@@ -55,17 +55,17 @@ __count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
     __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
     __storage_type __dn    = std::min(__clz_f, __n);
     __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
-    __r                    = std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
+    __r                    = std::popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
     __n -= __dn;
     ++__first.__seg_;
   }
   // do middle whole words
   for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word)
-    __r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
+    __r += std::popcount(std::__invert_if<!_ToCount>(*__first.__seg_));
   // do last partial word
   if (__n > 0) {
     __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
-    __r += std::__libcpp_popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
+    __r += std::popcount(std::__invert_if<!_ToCount>(*__first.__seg_) & __m);
   }
   return __r;
 }
diff --git a/libcxx/include/__algorithm/find.h b/libcxx/include/__algorithm/find.h
index 7d7631b6e98a96..4cac8f77c9eea6 100644
--- a/libcxx/include/__algorithm/find.h
+++ b/libcxx/include/__algorithm/find.h
@@ -110,7 +110,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
     __storage_type __m     = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
     __storage_type __b     = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
     if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+      return _It(__first.__seg_, static_cast<unsigned>(std::__countr_zero(__b)));
     if (__n == __dn)
       return __first + __n;
     __n -= __dn;
@@ -120,14 +120,14 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
   for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) {
     __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_);
     if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+      return _It(__first.__seg_, static_cast<unsigned>(std::__countr_zero(__b)));
   }
   // do last partial word
   if (__n > 0) {
     __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
     __storage_type __b = std::__invert_if<!_ToFind>(*__first.__seg_) & __m;
     if (__b)
-      return _It(__first.__seg_, static_cast<unsigned>(std::__libcpp_ctz(__b)));
+      return _It(__first.__seg_, static_cast<unsigned>(std::__countr_zero(__b)));
   }
   return _It(__first.__seg_, static_cast<unsigned>(__n));
 }
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 8a5e0211cdf4c1..46eb164e481e05 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -399,9 +399,9 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
   // Swap one pair on each iteration as long as both bitsets have at least one
   // element for swapping.
   while (__left_bitset != 0 && __right_bitset != 0) {
-    difference_type __tz_left  = __libcpp_ctz(__left_bitset);
+    difference_type __tz_left  = __countr_zero(__left_bitset);
     __left_bitset              = __libcpp_blsr(__left_bitset);
-    difference_type __tz_right = __libcpp_ctz(__right_bitset);
+    difference_type __tz_right = __countr_zero(__right_bitset);
     __right_bitset             = __libcpp_blsr(__right_bitset);
     _Ops::iter_swap(__first + __tz_left, __last - __tz_right);
   }
@@ -498,7 +498,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the left side.  Need to find set positions in the reverse
     // order.
     while (__left_bitset != 0) {
-      difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+      difference_type __tz_left = __detail::__block_size - 1 - __countl_zero(__left_bitset);
       __left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
       _RandomAccessIterator __it = __first + __tz_left;
       if (__it != __lm1) {
@@ -511,7 +511,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
     // Swap within the right side.  Need to find set positions in the reverse
     // order.
     while (__right_bitset != 0) {
-      difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+      difference_type __tz_right = __detail::__block_size - 1 - __countl_zero(__right_bitset);
       __right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
       _RandomAccessIterator __it = __lm1 - __tz_right;
       if (__it != __first) {
@@ -874,11 +874,11 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
   if (__n == 0)
     return 0;
   if (sizeof(__n) <= sizeof(unsigned))
-    return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
+    return sizeof(unsigned) * CHAR_BIT - 1 - __countl_zero(static_cast<unsigned>(__n));
   if (sizeof(__n) <= sizeof(unsigned long))
-    return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
+    return sizeof(unsigned long) * CHAR_BIT - 1 - __countl_zero(static_cast<unsigned long>(__n));
   if (sizeof(__n) <= sizeof(unsigned long long))
-    return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
+    return sizeof(unsigned long long) * CHAR_BIT - 1 - __countl_zero(static_cast<unsigned long long>(__n));
 
   _Number __log2 = 0;
   while (__n > 1) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index af0409950837b8..d836bb7d89bb14 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -24,31 +24,27 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
 #if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
+  return __builtin_clzg(__x, numeric_limits<_Tp>::digits);
+}
 #else
+
+_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
   return __builtin_clz(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#else
   return __builtin_clzl(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#else
   return __builtin_clzll(__x);
-#endif
 }
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#  ifndef _LIBCPP_HAS_NO_INT128
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
   // The function is written in this form due to C++ constexpr limitations.
   // The algorithm:
@@ -59,14 +55,10 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
   // - Any bits set:
   //   - The number of leading zeros of the input is the number of leading
   //     zeros in the high 64-bits.
-#  if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#  else
   return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
                             : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
-#  endif
 }
-#endif // _LIBCPP_HAS_NO_INT128
+#  endif // _LIBCPP_HAS_NO_INT128
 
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
@@ -74,9 +66,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
-#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -98,10 +87,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
     }
     return __ret + __iter;
   }
-#endif // __has_builtin(__builtin_clzg)
 }
 
-#if _LIBCPP_STD_VER >= 20
+#  if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countl_zero(_Tp __t) noexcept {
@@ -113,7 +101,9 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countl_one(_Tp __t) no
   return __t != numeric_limits<_Tp>::max() ? std::countl_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits;
 }
 
-#endif // _LIBCPP_STD_VER >= 20
+#  endif // _LIBCPP_STD_VER >= 20
+
+#endif // __has_builtin(__builtin_clzg)
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 6c2b2a74ee7ac7..80e170583d8bff 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -23,28 +23,24 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
 #if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countr_zero requires an unsigned integer type");
+  return __builtin_ctzg(__x, numeric_limits<_Tp>::digits);
+}
 #else
+
+_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
   return __builtin_ctz(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
-#else
   return __builtin_ctzl(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
-#else
   return __builtin_ctzll(__x);
-#endif
 }
 
 template <class _Tp>
@@ -52,9 +48,6 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-#if __has_builtin(__builtin_ctz)
-  return __builtin_ctz(__t);
-#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -70,10 +63,9 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
     }
     return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
   }
-#endif // __has_builtin(__builtin_ctz)
 }
 
-#if _LIBCPP_STD_VER >= 20
+#  if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_zero(_Tp __t) noexcept {
@@ -85,7 +77,9 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int countr_one(_Tp __t) no
   return __t != numeric_limits<_Tp>::max() ? std::countr_zero(static_cast<_Tp>(~__t)) : numeric_limits<_Tp>::digits;
 }
 
-#endif // _LIBCPP_STD_VER >= 20
+#  endif // _LIBCPP_STD_VER >= 20
+
+#endif // __has_builtin(__builtin_ctzg)
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 63e0aadec0496e..79eef1bf7ea7d3 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -23,55 +23,48 @@ _LIBCPP_PUSH_MACROS
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
 #if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int popcount(_Tp __t) _NOEXCEPT {
+  return __builtin_popcountg(__x, numeric_limits<_Tp>::digits);
+}
 #else
+
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
   return __builtin_popcount(__x);
-#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
-#else
   return __builtin_popcountl(__x);
-#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
-#else
   return __builtin_popcountll(__x);
-#endif
 }
 
-#if _LIBCPP_STD_VER >= 20
+#  if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
-#  if __has_builtin(__builtin_popcount)
-  return __builtin_popcount(__t);
-#  else
   if (sizeof(_Tp) <= sizeof(unsigned int))
-    return std::__libcpp_popcount(static_cast<unsigned int>(__t));
+    return std::popcount(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
-    return std::__libcpp_popcount(static_cast<unsigned long>(__t));
+    return std::popcount(static_cast<unsigned long>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long long))
-    return std::__libcpp_popcount(static_cast<unsigned long long>(__t));
+    return std::popcount(static_cast<unsigned long long>(__t));
   else {
     int __ret = 0;
     while (__t != 0) {
-      __ret += std::__libcpp_popcount(static_cast<unsigned long long>(__t));
+      __ret += std::popcount(static_cast<unsigned long long>(__t));
       __t >>= numeric_limits<unsigned long long>::digits;
     }
     return __ret;
   }
-#  endif // __has_builtin(__builtin_popcount)
 }
 
-#endif // _LIBCPP_STD_VER >= 20
+#  endif // _LIBCPP_STD_VER >= 20
+
+#endif // __has_builtin(__builtin_popcountg)
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 9579b9eaf70bbd..f937268c9ef2d4 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -91,7 +91,7 @@ public:
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
@@ -159,7 +159,7 @@ public:
   }
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
-    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+    return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
   }
 
 private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 0369f4dfb9bda6..1149c91d3beeb7 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -118,8 +118,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significant bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
+    // that __countl_zero doesn't work for 0.)
+    return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
   }
 
   template <typename _Tp>
@@ -154,8 +154,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
+    // that __countl_zero doesn't work for 0.)
+    return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
   }
 
   template <typename _Tp>
@@ -190,8 +190,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
   _LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
     // If value == 0 still need one digit. If the value != this has no
     // effect since the code scans for the most significat bit set. (Note
-    // that __libcpp_clz doesn't work for 0.)
-    return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
+    // that __countl_zero doesn't work for 0.)
+    return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
   }
 
   template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index c91c6da3247978..c01c9f0ecc726e 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -44,12 +44,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Since that
   /// function requires its input to have at least one bit set the value of
   /// zero is set to one. This means the first element of the lookup table is
   /// zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_32[__t]) + 1;
   }
 
@@ -70,12 +70,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Since that
   /// function requires its input to have at least one bit set the value of
   /// zero is set to one. This means the first element of the lookup table is
   /// zero.
   static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
-    auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+    auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
     return __t - (__v < __itoa::__pow10_64[__t]) + 1;
   }
 
@@ -97,7 +97,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
   ///
   /// The algorithm is based on
   /// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
-  /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
+  /// Instead of using IntegerLogBase2 it uses __countl_zero. Since that
   /// function requires its input to have at least one bit set the value of
   /// zero is set to one. This means the first element of the lookup table is
   /// zero.
@@ -105,7 +105,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
     _LIBCPP_ASSERT_INTERNAL(
         __v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
     // There's always a bit set in the upper 64-bits.
-    auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+    auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
     _LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
     // __t is adjusted since the lookup table misses the lower entries.
     return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index a705117d0173f9..6a90659e381748 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -139,7 +139,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
 }
 
 inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
-  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
+  return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __countl_zero(__n - 1)));
 }
 
 template <class _Tp, class _Hash, class _Equal, class _Alloc>
diff --git a/libcxx/include/__stop_token/atomic_unique_lock.h b/libcxx/include/__stop_token/atomic_unique_lock.h
index 13e59f9f0dce00..f5dcbfca7ddfdd 100644
--- a/libcxx/include/__stop_token/atomic_unique_lock.h
+++ b/libcxx/include/__stop_token/atomic_unique_lock.h
@@ -28,7 +28,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // and LockedBit is the value of State when the lock bit is set, e.g  1 << 2
 template <class _State, _State _LockedBit>
 class _LIBCPP_AVAILABILITY_SYNC __atomic_unique_lock {
-  static_assert(std::__libcpp_popcount(static_cast<unsigned long long>(_LockedBit)) == 1,
+  static_assert(std::popcount(static_cast<unsigned long long>(_LockedBit)) == 1,
                 "LockedBit must be an integer where only one bit is set");
 
   std::atomic<_State>& __state_;
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 85831bed61b21c..d58fabe72c849e 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -73,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
   if (__mask == 0) {
     return false;
   }
-  *__index = __libcpp_ctz(__mask);
+  *__index = __countr_zero(__mask);
   return true;
 }
 
@@ -81,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
   if (__mask == 0) {
     return false;
   }
-  *__index = __libcpp_ctz(__mask);
+  *__index = __countr_zero(__mask);
   return true;
 }
 #endif  // !_MSC_VER



More information about the libcxx-commits mailing list