[libcxx-commits] [libcxx] [libcxx] Use generic builtins for popcount, clz and ctz (PR #86563)

Marc Auberer via libcxx-commits libcxx-commits at lists.llvm.org
Thu Apr 11 01:08:28 PDT 2024


https://github.com/marcauberer updated https://github.com/llvm/llvm-project/pull/86563

>From f4ca119746c5f6f41f87a7db4f7b4fbdb223b99a Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 19:53:01 +0100
Subject: [PATCH 1/7] [libcxx] Use generic builtins for popcount, clz and ctz

Use __builtin_popcountg instead of __buildin_popcount{l|ll}
Use __builtin_clzg instead of __buildin_clz{l|ll}
Use __builtin_ctzg instead of __builtin_ctz{l|ll}
---
 libcxx/include/__bit/countl.h           | 16 ++++++++++++++++
 libcxx/include/__bit/countr.h           | 12 ++++++++++++
 libcxx/include/__bit/popcount.h         | 12 ++++++++++++
 libcxx/src/include/ryu/d2s_intrinsics.h |  2 +-
 libcxx/src/include/ryu/ryu.h            |  5 +++--
 libcxx/src/ryu/f2s.cpp                  |  2 +-
 6 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 396cfc2c3f4064..bd87f8903787ce 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clz(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clzl(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#else
   return __builtin_clzll(__x);
+#endif
 }
 
 #ifndef _LIBCPP_HAS_NO_INT128
@@ -47,8 +59,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
   // - Any bits set:
   //   - The number of leading zeros of the input is the number of leading
   //     zeros in the high 64-bits.
+#  if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#  else
   return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
                             : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+#  endif
 }
 #endif // _LIBCPP_HAS_NO_INT128
 
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b6b3ac52ca4e47..af347688888739 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctz(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctzl(__x);
+#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+  return __builtin_ctzg(__x);
+#else
   return __builtin_ctzll(__x);
+#endif
 }
 
 template <class _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b0319cef251894..85ba84a572dd83 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcount(__x);
+#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcountl(__x);
+#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+  return __builtin_popcountg(__x);
+#else
   return __builtin_popcountll(__x);
+#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index be50361fb3b334..afe64649a0be1c 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
-  // __builtin_ctzll doesn't appear to be faster here.
+  // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1ull << __p) - 1)) == 0;
 }
 
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 7b19ecfec5915a..85831bed61b21c 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -43,6 +43,7 @@
 // Avoid formatting to keep the changes with the original code minimal.
 // clang-format off
 
+#include <__bit/countr.h>
 #include <__charconv/chars_format.h>
 #include <__charconv/to_chars_result.h>
 #include <__config>
@@ -72,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctzll(__mask);
+  *__index = __libcpp_ctz(__mask);
   return true;
 }
 
@@ -80,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctz(__mask);
+  *__index = __libcpp_ctz(__mask);
   return true;
 }
 #endif  // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index f42fbd68c91d2d..e7b5d39669f990 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
-  // __builtin_ctz doesn't appear to be faster here.
+  // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1u << __p) - 1)) == 0;
 }
 

>From 35355091bf4545384b0750f56d1ee67f7e0f4a81 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 22:30:22 +0100
Subject: [PATCH 2/7] Shortcuts that benefit from generic builtins

---
 libcxx/include/__bit/countl.h   | 4 ++++
 libcxx/include/__bit/countr.h   | 4 ++++
 libcxx/include/__bit/popcount.h | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index bd87f8903787ce..af0409950837b8 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -74,6 +74,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
+#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -95,6 +98,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
     }
     return __ret + __iter;
   }
+#endif // __has_builtin(__builtin_clzg)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index af347688888739..6c2b2a74ee7ac7 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -52,6 +52,9 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_ctz)
+  return __builtin_ctz(__t);
+#else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -67,6 +70,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
     }
     return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
   }
+#endif // __has_builtin(__builtin_ctz)
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 85ba84a572dd83..63e0aadec0496e 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -51,6 +51,9 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo
 
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+#  if __has_builtin(__builtin_popcount)
+  return __builtin_popcount(__t);
+#  else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_popcount(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -65,6 +68,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex
     }
     return __ret;
   }
+#  endif // __has_builtin(__builtin_popcount)
 }
 
 #endif // _LIBCPP_STD_VER >= 20

>From 227d89e3f0dd9739a5b367311214c25aefeeadaa Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Tue, 26 Mar 2024 00:42:27 +0100
Subject: [PATCH 3/7] Simplify

---
 libcxx/include/__bit/countl.h   | 17 ++++++++++++++---
 libcxx/include/__bit/countr.h   | 16 +++++++++++++---
 libcxx/include/__bit/popcount.h | 15 +++++++++++----
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index af0409950837b8..a1998ba5f27b4d 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -68,15 +68,25 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
 }
 #endif // _LIBCPP_HAS_NO_INT128
 
+#if __has_builtin(__builtin_clzg)
+
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-#if __has_builtin(__builtin_clzg)
   return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
-#else
+}
+
+#else // __has_builtin(__builtin_clzg)
+
+template <class _Tp>
+_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
+  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
+  if (__t == 0)
+    return numeric_limits<_Tp>::digits;
+
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -98,9 +108,10 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
     }
     return __ret + __iter;
   }
-#endif // __has_builtin(__builtin_clzg)
 }
 
+#endif // __has_builtin(__builtin_clzg)
+
 #if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 6c2b2a74ee7ac7..b2feaf7d97bce2 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -47,14 +47,23 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
 #endif
 }
 
+#if __has_builtin(__builtin_ctzg)
+
 template <class _Tp>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-#if __has_builtin(__builtin_ctz)
   return __builtin_ctz(__t);
-#else
+}
+
+#else // __has_builtin(__builtin_ctzg)
+
+template <class _Tp>
+_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
+  if (__t == 0)
+    return numeric_limits<_Tp>::digits;
+
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -70,9 +79,10 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
     }
     return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
   }
-#endif // __has_builtin(__builtin_ctz)
 }
 
+#endif // __has_builtin(__builtin_ctzg)
+
 #if _LIBCPP_STD_VER >= 20
 
 template <__libcpp_unsigned_integer _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 63e0aadec0496e..68a723bcce63e2 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -49,11 +49,17 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo
 
 #if _LIBCPP_STD_VER >= 20
 
+#  if __has_builtin(__builtin_popcountg)
+
+template <__libcpp_unsigned_integer _Tp>
+_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+  return __builtin_popcountg(__t);
+}
+
+#  else // __has_builtin(__builtin_popcountg)
+
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
-#  if __has_builtin(__builtin_popcount)
-  return __builtin_popcount(__t);
-#  else
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_popcount(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -68,9 +74,10 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex
     }
     return __ret;
   }
-#  endif // __has_builtin(__builtin_popcount)
 }
 
+#  endif // __has_builtin(__builtin_popcountg)
+
 #endif // _LIBCPP_STD_VER >= 20
 
 _LIBCPP_END_NAMESPACE_STD

>From 41eb681ac0b69306c173f9686ce47b7366274e36 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Tue, 26 Mar 2024 22:54:17 +0100
Subject: [PATCH 4/7] Do cleanup and remove non-feasible usages

---
 libcxx/include/__bit/countl.h           | 23 +++++++----------------
 libcxx/include/__bit/countr.h           | 15 +++------------
 libcxx/include/__bit/popcount.h         | 15 +++------------
 libcxx/src/include/ryu/d2s_intrinsics.h |  2 +-
 libcxx/src/include/ryu/ryu.h            |  4 ++--
 libcxx/src/ryu/f2s.cpp                  |  2 +-
 6 files changed, 17 insertions(+), 44 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index a1998ba5f27b4d..21bdde9bbe4c8d 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// ToDo: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+//  refactor this code to exclusively use __builtin_clzg.
+
 #ifndef _LIBCPP___BIT_COUNTL_H
 #define _LIBCPP___BIT_COUNTL_H
 
@@ -25,31 +28,22 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#else
   return __builtin_clz(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#else
   return __builtin_clzl(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#else
   return __builtin_clzll(__x);
-#endif
 }
 
 #ifndef _LIBCPP_HAS_NO_INT128
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT {
+#  if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__x);
+#  else
   // The function is written in this form due to C++ constexpr limitations.
   // The algorithm:
   // - Test whether any bit in the high 64-bits is set
@@ -59,9 +53,6 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
   // - Any bits set:
   //   - The number of leading zeros of the input is the number of leading
   //     zeros in the high 64-bits.
-#  if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__x);
-#  else
   return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
                             : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
 #  endif
@@ -76,7 +67,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-  return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
+  return __builtin_clzg(__t);
 }
 
 #else // __has_builtin(__builtin_clzg)
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b2feaf7d97bce2..a01ec96417f08d 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// ToDo: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+//  refactor this code to exclusively use __builtin_ctzg.
+
 #ifndef _LIBCPP___BIT_COUNTR_H
 #define _LIBCPP___BIT_COUNTR_H
 
@@ -24,27 +27,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
-#if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
-#else
   return __builtin_ctz(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
-#else
   return __builtin_ctzl(__x);
-#endif
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__x);
-#else
   return __builtin_ctzll(__x);
-#endif
 }
 
 #if __has_builtin(__builtin_ctzg)
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 68a723bcce63e2..88c967d67c60a8 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+// ToDo: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+//  refactor this code to exclusively use __builtin_popcountg.
+
 #ifndef _LIBCPP___BIT_POPCOUNT_H
 #define _LIBCPP___BIT_POPCOUNT_H
 
@@ -24,27 +27,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
-#if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
-#else
   return __builtin_popcount(__x);
-#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
-#else
   return __builtin_popcountl(__x);
-#endif
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
-#if __has_builtin(__builtin_popcountg)
-  return __builtin_popcountg(__x);
-#else
   return __builtin_popcountll(__x);
-#endif
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index afe64649a0be1c..be50361fb3b334 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
-  // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
+  // __builtin_ctzll doesn't appear to be faster here.
   return (__value & ((1ull << __p) - 1)) == 0;
 }
 
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 85831bed61b21c..de1744ba20a0e2 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -73,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
   if (__mask == 0) {
     return false;
   }
-  *__index = __libcpp_ctz(__mask);
+  *__index = __builtin_ctzll(__mask);
   return true;
 }
 
@@ -81,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
   if (__mask == 0) {
     return false;
   }
-  *__index = __libcpp_ctz(__mask);
+  *__index = __builtin_ctz(__mask);
   return true;
 }
 #endif  // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index e7b5d39669f990..f42fbd68c91d2d 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
-  // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
+  // __builtin_ctz doesn't appear to be faster here.
   return (__value & ((1u << __p) - 1)) == 0;
 }
 

>From 6d969ba90b9829413c675249a3e187f60625fc15 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Wed, 27 Mar 2024 02:17:21 +0100
Subject: [PATCH 5/7] Fix typo

---
 libcxx/include/__bit/countl.h   | 2 +-
 libcxx/include/__bit/countr.h   | 4 ++--
 libcxx/include/__bit/popcount.h | 2 +-
 libcxx/src/include/ryu/ryu.h    | 1 -
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 21bdde9bbe4c8d..5e71deb43fae23 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// ToDo: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+// TODO: __builtin_clzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
 //  refactor this code to exclusively use __builtin_clzg.
 
 #ifndef _LIBCPP___BIT_COUNTL_H
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index a01ec96417f08d..8d345d72d17278 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// ToDo: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+// TODO: __builtin_ctzg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
 //  refactor this code to exclusively use __builtin_ctzg.
 
 #ifndef _LIBCPP___BIT_COUNTR_H
@@ -45,7 +45,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-  return __builtin_ctz(__t);
+  return __builtin_ctzg(__t);
 }
 
 #else // __has_builtin(__builtin_ctzg)
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 88c967d67c60a8..a07e9e6ffc56e4 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// ToDo: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
+// TODO: __builtin_popcountg is available since Clang 19 and GCC 14. When support for older versions is dropped, we can
 //  refactor this code to exclusively use __builtin_popcountg.
 
 #ifndef _LIBCPP___BIT_POPCOUNT_H
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index de1744ba20a0e2..7b19ecfec5915a 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -43,7 +43,6 @@
 // Avoid formatting to keep the changes with the original code minimal.
 // clang-format off
 
-#include <__bit/countr.h>
 #include <__charconv/chars_format.h>
 #include <__charconv/to_chars_result.h>
 #include <__config>

>From bf43981f233b146cb7b259b7af006a951ec912ca Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Thu, 28 Mar 2024 17:39:31 +0100
Subject: [PATCH 6/7] Avoid duplicate signatures

---
 libcxx/include/__bit/countl.h   | 17 +++--------------
 libcxx/include/__bit/countr.h   | 16 +++-------------
 libcxx/include/__bit/popcount.h | 13 +++----------
 3 files changed, 9 insertions(+), 37 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 5e71deb43fae23..e64e435f80a0b9 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -59,25 +59,15 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
 }
 #endif // _LIBCPP_HAS_NO_INT128
 
-#if __has_builtin(__builtin_clzg)
-
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_clzg)
   return __builtin_clzg(__t);
-}
-
-#else // __has_builtin(__builtin_clzg)
-
-template <class _Tp>
-_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
-  static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
-  if (__t == 0)
-    return numeric_limits<_Tp>::digits;
-
+#else  // __has_builtin(__builtin_clzg)
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -99,9 +89,8 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
     }
     return __ret + __iter;
   }
-}
-
 #endif // __has_builtin(__builtin_clzg)
+}
 
 #if _LIBCPP_STD_VER >= 20
 
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 8d345d72d17278..5907f6fc9c7458 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -38,23 +38,14 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
   return __builtin_ctzll(__x);
 }
 
-#if __has_builtin(__builtin_ctzg)
-
 template <class _Tp>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
+#if __has_builtin(__builtin_ctzg)
   return __builtin_ctzg(__t);
-}
-
-#else // __has_builtin(__builtin_ctzg)
-
-template <class _Tp>
-_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
-  if (__t == 0)
-    return numeric_limits<_Tp>::digits;
-
+#else  // __has_builtin(__builtin_ctzg)
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -70,9 +61,8 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
     }
     return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
   }
-}
-
 #endif // __has_builtin(__builtin_ctzg)
+}
 
 #if _LIBCPP_STD_VER >= 20
 
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index a07e9e6ffc56e4..37b3a3e1f3f2b9 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -40,17 +40,11 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo
 
 #if _LIBCPP_STD_VER >= 20
 
-#  if __has_builtin(__builtin_popcountg)
-
 template <__libcpp_unsigned_integer _Tp>
 _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+#  if __has_builtin(__builtin_popcountg)
   return __builtin_popcountg(__t);
-}
-
-#  else // __has_builtin(__builtin_popcountg)
-
-template <__libcpp_unsigned_integer _Tp>
-_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+#  else  // __has_builtin(__builtin_popcountg)
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_popcount(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -65,9 +59,8 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex
     }
     return __ret;
   }
-}
-
 #  endif // __has_builtin(__builtin_popcountg)
+}
 
 #endif // _LIBCPP_STD_VER >= 20
 

>From d3c2847a1f3f1289cbe556ba266f4963547ea9f9 Mon Sep 17 00:00:00 2001
From: Marc Auberer <contact at marc-auberer.com>
Date: Fri, 29 Mar 2024 18:43:57 +0100
Subject: [PATCH 7/7] Use optional second parameter of c{l|t}z builtins

Co-authored-by: Nick Desaulniers <nickdesaulniers at users.noreply.github.com>
---
 libcxx/include/__bit/countl.h | 6 +++---
 libcxx/include/__bit/countr.h | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index e64e435f80a0b9..13df8d4e66c402 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -62,12 +62,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
 template <class _Tp>
 _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT {
   static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__countl_zero requires an unsigned integer type");
+#if __has_builtin(__builtin_clzg)
+  return __builtin_clzg(__t, numeric_limits<_Tp>::digits);
+#else  // __has_builtin(__builtin_clzg)
   if (__t == 0)
     return numeric_limits<_Tp>::digits;
 
-#if __has_builtin(__builtin_clzg)
-  return __builtin_clzg(__t);
-#else  // __has_builtin(__builtin_clzg)
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
            (numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index 5907f6fc9c7458..724a0bc23801c4 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -40,12 +40,11 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ct
 
 template <class _Tp>
 _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT {
-  if (__t == 0)
-    return numeric_limits<_Tp>::digits;
-
 #if __has_builtin(__builtin_ctzg)
-  return __builtin_ctzg(__t);
+  return __builtin_ctzg(__t, numeric_limits<_Tp>::digits);
 #else  // __has_builtin(__builtin_ctzg)
+  if (__t == 0)
+    return numeric_limits<_Tp>::digits;
   if (sizeof(_Tp) <= sizeof(unsigned int))
     return std::__libcpp_ctz(static_cast<unsigned int>(__t));
   else if (sizeof(_Tp) <= sizeof(unsigned long))



More information about the libcxx-commits mailing list