[libcxx-commits] [libcxx] [libc++] Replace __libcpp_{ctz, clz} with __builtin_{ctzg, clzg} (PR #133920)
Peng Liu via libcxx-commits
libcxx-commits at lists.llvm.org
Wed Apr 2 19:42:12 PDT 2025
https://github.com/winner245 updated https://github.com/llvm/llvm-project/pull/133920
>From 60bbd97509b5778b1eab6326c102e6b274adad05 Mon Sep 17 00:00:00 2001
From: Peng Liu <winner245 at hotmail.com>
Date: Tue, 1 Apr 2025 10:45:09 -0400
Subject: [PATCH] Replace __libcpp_{ctz,clz} with __builtin_{ctzg,clzg}
---
libcxx/include/__algorithm/sort.h | 18 +++---
libcxx/include/__bit/countl.h | 62 -------------------
libcxx/include/__bit/countr.h | 49 ---------------
libcxx/include/__bit_reference | 4 +-
libcxx/include/__charconv/to_chars_integral.h | 15 ++---
libcxx/include/__charconv/traits.h | 30 ++++-----
libcxx/include/__hash_table | 2 +-
7 files changed, 33 insertions(+), 147 deletions(-)
diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h
index 4332b62544b40..77f347d37660c 100644
--- a/libcxx/include/__algorithm/sort.h
+++ b/libcxx/include/__algorithm/sort.h
@@ -357,10 +357,10 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos(
// Swap one pair on each iteration as long as both bitsets have at least one
// element for swapping.
while (__left_bitset != 0 && __right_bitset != 0) {
- difference_type __tz_left = __libcpp_ctz(__left_bitset);
- __left_bitset = __libcpp_blsr(__left_bitset);
- difference_type __tz_right = __libcpp_ctz(__right_bitset);
- __right_bitset = __libcpp_blsr(__right_bitset);
+ difference_type __tz_left = std::__countr_zero(__left_bitset);
+ __left_bitset = std::__libcpp_blsr(__left_bitset);
+ difference_type __tz_right = std::__countr_zero(__right_bitset);
+ __right_bitset = std::__libcpp_blsr(__right_bitset);
_Ops::iter_swap(__first + __tz_left, __last - __tz_right);
}
}
@@ -456,7 +456,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the left side. Need to find set positions in the reverse
// order.
while (__left_bitset != 0) {
- difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset);
+ difference_type __tz_left = __detail::__block_size - 1 - std::__countl_zero(__left_bitset);
__left_bitset &= (static_cast<uint64_t>(1) << __tz_left) - 1;
_RandomAccessIterator __it = __first + __tz_left;
if (__it != __lm1) {
@@ -469,7 +469,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within(
// Swap within the right side. Need to find set positions in the reverse
// order.
while (__right_bitset != 0) {
- difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset);
+ difference_type __tz_right = __detail::__block_size - 1 - std::__countl_zero(__right_bitset);
__right_bitset &= (static_cast<uint64_t>(1) << __tz_right) - 1;
_RandomAccessIterator __it = __lm1 - __tz_right;
if (__it != __first) {
@@ -831,11 +831,11 @@ inline _LIBCPP_HIDE_FROM_ABI _Number __log2i(_Number __n) {
if (__n == 0)
return 0;
if (sizeof(__n) <= sizeof(unsigned))
- return sizeof(unsigned) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned>(__n));
+ return sizeof(unsigned) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned>(__n));
if (sizeof(__n) <= sizeof(unsigned long))
- return sizeof(unsigned long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long>(__n));
+ return sizeof(unsigned long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long>(__n));
if (sizeof(__n) <= sizeof(unsigned long long))
- return sizeof(unsigned long long) * CHAR_BIT - 1 - __libcpp_clz(static_cast<unsigned long long>(__n));
+ return sizeof(unsigned long long) * CHAR_BIT - 1 - std::__countl_zero(static_cast<unsigned long long>(__n));
_Number __log2 = 0;
while (__n > 1) {
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index d4df1d049b294..9c37749f92577 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -6,9 +6,6 @@
//
//===----------------------------------------------------------------------===//
-// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-// refactor this code to exclusively use __builtin_clzg.
-
#ifndef _LIBCPP___BIT_COUNTL_H
#define _LIBCPP___BIT_COUNTL_H
@@ -27,69 +24,10 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
- return __builtin_clz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
- return __builtin_clzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
- return __builtin_clzll(__x);
-}
-
-#if _LIBCPP_HAS_INT128
-inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
-# if __has_builtin(__builtin_clzg)
- return __builtin_clzg(__x);
-# else
- // The function is written in this form due to C++ constexpr limitations.
- // The algorithm:
- // - Test whether any bit in the high 64-bits is set
- // - No bits set:
- // - The high 64-bits contain 64 leading zeros,
- // - Add the result of the low 64-bits.
- // - Any bits set:
- // - The number of leading zeros of the input is the number of leading
- // zeros in the high 64-bits.
- return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
- : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
-# endif
-}
-#endif // _LIBCPP_HAS_INT128
-
template <class _Tp>
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
-#if __has_builtin(__builtin_clzg)
return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
-#else // __has_builtin(__builtin_clzg)
- if (__t == 0)
- return numeric_limits<_Tp>::digits;
-
- if (sizeof(_Tp) <= sizeof(unsigned int))
- return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
- (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
- else if (sizeof(_Tp) <= sizeof(unsigned long))
- return std::__libcpp_clz(static_cast<unsigned long>(__t)) -
- (numeric_limits<unsigned long>::digits - numeric_limits<_Tp>::digits);
- else if (sizeof(_Tp) <= sizeof(unsigned long long))
- return std::__libcpp_clz(static_cast<unsigned long long>(__t)) -
- (numeric_limits<unsigned long long>::digits - numeric_limits<_Tp>::digits);
- else {
- int __ret = 0;
- int __iter = 0;
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- while (true) {
- __t = std::__rotl(__t, __ulldigits);
- if ((__iter = std::__countl_zero(static_cast<unsigned long long>(__t))) != __ulldigits)
- break;
- __ret += __iter;
- }
- return __ret + __iter;
- }
-#endif // __has_builtin(__builtin_clzg)
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 46c43921fc60d..f87175971bed3 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -6,14 +6,10 @@
//
//===----------------------------------------------------------------------===//
-// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
-// refactor this code to exclusively use __builtin_ctzg.
-
#ifndef _LIBCPP___BIT_COUNTR_H
#define _LIBCPP___BIT_COUNTR_H
#include <__assert>
-#include <__bit/rotate.h>
#include <__concepts/arithmetic.h>
#include <__config>
#include <__type_traits/is_unsigned.h>
@@ -28,55 +24,10 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
- return __builtin_ctz(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
- return __builtin_ctzl(__x);
-}
-
-[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
- return __builtin_ctzll(__x);
-}
-
-// A constexpr implementation for C++11 and later (using clang extensions for constexpr support)
-// Precondition: __t != 0 (the caller __countr_zero handles __t == 0 as a special case)
-template <class _Tp>
-[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero_impl(_Tp __t) _NOEXCEPT {
- _LIBCPP_ASSERT_INTERNAL(__t != 0, "__countr_zero_impl called with zero value");
- static_assert(is_unsigned<_Tp>::value, "__countr_zero_impl only works with unsigned types");
- if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned int)) {
- return std::__libcpp_ctz(static_cast<unsigned int>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long)) {
- return std::__libcpp_ctz(static_cast<unsigned long>(__t));
- } else if _LIBCPP_CONSTEXPR (sizeof(_Tp) <= sizeof(unsigned long long)) {
- return std::__libcpp_ctz(static_cast<unsigned long long>(__t));
- } else {
-#if _LIBCPP_STD_VER == 11
- unsigned long long __ull = static_cast<unsigned long long>(__t);
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- return __ull == 0ull ? __ulldigits + std::__countr_zero_impl<_Tp>(__t >> __ulldigits) : std::__libcpp_ctz(__ull);
-#else
- int __ret = 0;
- const unsigned int __ulldigits = numeric_limits<unsigned long long>::digits;
- while (static_cast<unsigned long long>(__t) == 0uLL) {
- __ret += __ulldigits;
- __t >>= __ulldigits;
- }
- return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
-#endif
- }
-}
-
template <class _Tp>
[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __countr_zero(_Tp __t) _NOEXCEPT {
static_assert(is_unsigned<_Tp>::value, "__countr_zero only works with unsigned types");
-#if __has_builtin(__builtin_ctzg) // TODO (LLVM 21): This can be dropped once we only support Clang >= 19.
return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
-#else
- return __t != 0 ? std::__countr_zero_impl(__t) : numeric_limits<_Tp>::digits;
-#endif
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit_reference b/libcxx/include/__bit_reference
index 552e0f5e4d799..a3e6defd405f8 100644
--- a/libcxx/include/__bit_reference
+++ b/libcxx/include/__bit_reference
@@ -165,7 +165,7 @@ public:
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void flip() _NOEXCEPT { *__seg_ ^= __mask_; }
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false> operator&() const _NOEXCEPT {
- return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+ return __bit_iterator<_Cp, false>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}
private:
@@ -234,7 +234,7 @@ public:
}
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, true> operator&() const _NOEXCEPT {
- return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__libcpp_ctz(__mask_)));
+ return __bit_iterator<_Cp, true>(__seg_, static_cast<unsigned>(std::__countr_zero(__mask_)));
}
private:
diff --git a/libcxx/include/__charconv/to_chars_integral.h b/libcxx/include/__charconv/to_chars_integral.h
index 710299df9b4da..9959d61336833 100644
--- a/libcxx/include/__charconv/to_chars_integral.h
+++ b/libcxx/include/__charconv/to_chars_integral.h
@@ -118,9 +118,8 @@ struct _LIBCPP_HIDDEN __integral<2> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significant bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1);
+ // effect since the code scans for the most significant bit set.
+ return numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1);
}
template <typename _Tp>
@@ -154,9 +153,8 @@ struct _LIBCPP_HIDDEN __integral<8> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significat bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return ((numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1)) + 2) / 3;
+ // effect since the code scans for the most significat bit set.
+ return ((numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1)) + 2) / 3;
}
template <typename _Tp>
@@ -190,9 +188,8 @@ struct _LIBCPP_HIDDEN __integral<16> {
template <typename _Tp>
_LIBCPP_HIDE_FROM_ABI static constexpr int __width(_Tp __value) noexcept {
// If value == 0 still need one digit. If the value != this has no
- // effect since the code scans for the most significat bit set. (Note
- // that __libcpp_clz doesn't work for 0.)
- return (numeric_limits<_Tp>::digits - std::__libcpp_clz(__value | 1) + 3) / 4;
+ // effect since the code scans for the most significat bit set.
+ return (numeric_limits<_Tp>::digits - std::__countl_zero(__value | 1) + 3) / 4;
}
template <typename _Tp>
diff --git a/libcxx/include/__charconv/traits.h b/libcxx/include/__charconv/traits.h
index dd1fa2354436a..46a3a0f3d2e39 100644
--- a/libcxx/include/__charconv/traits.h
+++ b/libcxx/include/__charconv/traits.h
@@ -45,12 +45,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) <= sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
- auto __t = (32 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+ auto __t = (32 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_32[__t]) + 1;
}
@@ -71,12 +71,12 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(uin
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
- auto __t = (64 - std::__libcpp_clz(static_cast<type>(__v | 1))) * 1233 >> 12;
+ auto __t = (64 - std::__countl_zero(static_cast<type>(__v | 1))) * 1233 >> 12;
return __t - (__v < __itoa::__pow10_64[__t]) + 1;
}
@@ -98,15 +98,15 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t<sizeof(_Tp) == sizeof(__u
///
/// The algorithm is based on
/// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
- /// Instead of using IntegerLogBase2 it uses __libcpp_clz. Since that
- /// function requires its input to have at least one bit set the value of
- /// zero is set to one. This means the first element of the lookup table is
- /// zero.
+ /// Instead of using IntegerLogBase2 it uses __countl_zero. Previously, it
+ /// used __libcpp_clz. Since that function requires its input to have at
+ /// least one bit set the value of zero is set to one. This means the first
+ /// element of the lookup table is zero.
static _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI int __width(_Tp __v) {
_LIBCPP_ASSERT_INTERNAL(
__v > numeric_limits<uint64_t>::max(), "The optimizations for this algorithm fail when this isn't true.");
// There's always a bit set in the upper 64-bits.
- auto __t = (128 - std::__libcpp_clz(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
+ auto __t = (128 - std::__countl_zero(static_cast<uint64_t>(__v >> 64))) * 1233 >> 12;
_LIBCPP_ASSERT_INTERNAL(__t >= __itoa::__pow10_128_offset, "Index out of bounds");
// __t is adjusted since the lookup table misses the lower entries.
return __t - (__v < __itoa::__pow10_128[__t - __itoa::__pow10_128_offset]) + 1;
diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index d7b312f8774fc..0860c53f84636 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -147,7 +147,7 @@ inline _LIBCPP_HIDE_FROM_ABI size_t __constrain_hash(size_t __h, size_t __bc) {
}
inline _LIBCPP_HIDE_FROM_ABI size_t __next_hash_pow2(size_t __n) {
- return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - __libcpp_clz(__n - 1)));
+ return __n < 2 ? __n : (size_t(1) << (numeric_limits<size_t>::digits - std::__countl_zero(__n - 1)));
}
template <class _Tp, class _Hash, class _Equal, class _Alloc>
More information about the libcxx-commits
mailing list