[libcxx-commits] [libcxx] [libcxx] Use generic builtins for popcount, clz and ctz (PR #86563)

Marc Auberer via libcxx-commits libcxx-commits at lists.llvm.org
Mon Mar 25 12:23:15 PDT 2024


https://github.com/marcauberer created https://github.com/llvm/llvm-project/pull/86563

Use `__builtin_popcountg` instead of `__buildin_popcount{l|ll}`
Use `__builtin_clzg instead` of `__buildin_clz{l|ll}`
Use `__builtin_ctzg instead` of `__builtin_ctz{l|ll}`

cc @nickdesaulniers 

>From bb2b3775f92bf2399112c0860377ea67a0359cb7 Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 19:53:01 +0100
Subject: [PATCH] [libcxx] Use generic builtins for popcount, clz and ctz

Use __builtin_popcountg instead of __buildin_popcount{l|ll}
Use __builtin_clzg instead of __buildin_clz{l|ll}
Use __builtin_ctzg instead of __builtin_ctz{l|ll}
---
 libcxx/include/__bit/countl.h           | 10 +++++-----
 libcxx/include/__bit/countr.h           |  6 +++---
 libcxx/include/__bit/popcount.h         |  6 +++---
 libcxx/src/include/ryu/d2s_intrinsics.h |  2 +-
 libcxx/src/include/ryu/ryu.h            |  4 ++--
 libcxx/src/ryu/f2s.cpp                  |  2 +-
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 396cfc2c3f4064..28d1e03b4a381e 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -25,15 +25,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
-  return __builtin_clz(__x);
+  return __builtin_clzg(__x);
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
-  return __builtin_clzl(__x);
+  return __builtin_clzg(__x);
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_clzll(__x);
+  return __builtin_clzg(__x);
 }
 
 #ifndef _LIBCPP_HAS_NO_INT128
@@ -47,8 +47,8 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
   // - Any bits set:
   //   - The number of leading zeros of the input is the number of leading
   //     zeros in the high 64-bits.
-  return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
-                            : __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+  return ((__x >> 64) == 0) ? (64 + __builtin_clzg(static_cast<unsigned long long>(__x)))
+                            : __builtin_clzg(static_cast<unsigned long long>(__x >> 64));
 }
 #endif // _LIBCPP_HAS_NO_INT128
 
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b6b3ac52ca4e47..dc05a88a6153d5 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -24,15 +24,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
-  return __builtin_ctz(__x);
+  return __builtin_ctzg(__x);
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
-  return __builtin_ctzl(__x);
+  return __builtin_ctzg(__x);
 }
 
 _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
-  return __builtin_ctzll(__x);
+  return __builtin_ctzg(__x);
 }
 
 template <class _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b0319cef251894..42026554335261 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -24,15 +24,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
-  return __builtin_popcount(__x);
+  return __builtin_popcountg(__x);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
-  return __builtin_popcountl(__x);
+  return __builtin_popcountg(__x);
 }
 
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
-  return __builtin_popcountll(__x);
+  return __builtin_popcountg(__x);
 }
 
 #if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index be50361fb3b334..afe64649a0be1c 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 64, "");
-  // __builtin_ctzll doesn't appear to be faster here.
+  // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1ull << __p) - 1)) == 0;
 }
 
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 7b19ecfec5915a..67fb0392f1e205 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -72,7 +72,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctzll(__mask);
+  *__index = __builtin_ctzg(__mask);
   return true;
 }
 
@@ -80,7 +80,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
   if (__mask == 0) {
     return false;
   }
-  *__index = __builtin_ctz(__mask);
+  *__index = __builtin_ctzg(__mask);
   return true;
 }
 #endif  // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index f42fbd68c91d2d..e7b5d39669f990 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
   _LIBCPP_ASSERT_INTERNAL(__value != 0, "");
   _LIBCPP_ASSERT_INTERNAL(__p < 32, "");
-  // __builtin_ctz doesn't appear to be faster here.
+  // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
   return (__value & ((1u << __p) - 1)) == 0;
 }
 



More information about the libcxx-commits mailing list