[libcxx-commits] [libcxx] [libcxx] Use generic builtins for popcount, clz and ctz (PR #86563)
Marc Auberer via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Mar 25 14:30:35 PDT 2024
https://github.com/marcauberer updated https://github.com/llvm/llvm-project/pull/86563
>From 9d22aef5c56f36de60248b37933fd6e674f28f9c Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 19:53:01 +0100
Subject: [PATCH 1/2] [libcxx] Use generic builtins for popcount, clz and ctz
Use __builtin_popcountg instead of __buildin_popcount{l|ll}
Use __builtin_clzg instead of __buildin_clz{l|ll}
Use __builtin_ctzg instead of __builtin_ctz{l|ll}
---
libcxx/include/__bit/countl.h | 16 ++++++++++++++++
libcxx/include/__bit/countr.h | 12 ++++++++++++
libcxx/include/__bit/popcount.h | 12 ++++++++++++
libcxx/src/include/ryu/d2s_intrinsics.h | 2 +-
libcxx/src/include/ryu/ryu.h | 5 +++--
libcxx/src/ryu/f2s.cpp | 2 +-
6 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index 396cfc2c3f4064..bd87f8903787ce 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -25,15 +25,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+ return __builtin_clzg(__x);
+#else
return __builtin_clz(__x);
+#endif
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+ return __builtin_clzg(__x);
+#else
return __builtin_clzl(__x);
+#endif
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_clzg)
+ return __builtin_clzg(__x);
+#else
return __builtin_clzll(__x);
+#endif
}
#ifndef _LIBCPP_HAS_NO_INT128
@@ -47,8 +59,12 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x)
// - Any bits set:
// - The number of leading zeros of the input is the number of leading
// zeros in the high 64-bits.
+# if __has_builtin(__builtin_clzg)
+ return __builtin_clzg(__x);
+# else
return ((__x >> 64) == 0) ? (64 + __builtin_clzll(static_cast<unsigned long long>(__x)))
: __builtin_clzll(static_cast<unsigned long long>(__x >> 64));
+# endif
}
#endif // _LIBCPP_HAS_NO_INT128
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index b6b3ac52ca4e47..af347688888739 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+ return __builtin_ctzg(__x);
+#else
return __builtin_ctz(__x);
+#endif
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+ return __builtin_ctzg(__x);
+#else
return __builtin_ctzl(__x);
+#endif
}
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_ctzg)
+ return __builtin_ctzg(__x);
+#else
return __builtin_ctzll(__x);
+#endif
}
template <class _Tp>
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index b0319cef251894..85ba84a572dd83 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -24,15 +24,27 @@ _LIBCPP_PUSH_MACROS
_LIBCPP_BEGIN_NAMESPACE_STD
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+ return __builtin_popcountg(__x);
+#else
return __builtin_popcount(__x);
+#endif
}
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+ return __builtin_popcountg(__x);
+#else
return __builtin_popcountl(__x);
+#endif
}
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned long long __x) _NOEXCEPT {
+#if __has_builtin(__builtin_popcountg)
+ return __builtin_popcountg(__x);
+#else
return __builtin_popcountll(__x);
+#endif
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/src/include/ryu/d2s_intrinsics.h b/libcxx/src/include/ryu/d2s_intrinsics.h
index be50361fb3b334..afe64649a0be1c 100644
--- a/libcxx/src/include/ryu/d2s_intrinsics.h
+++ b/libcxx/src/include/ryu/d2s_intrinsics.h
@@ -249,7 +249,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint64_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 64, "");
- // __builtin_ctzll doesn't appear to be faster here.
+ // __builtin_ctzll/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1ull << __p) - 1)) == 0;
}
diff --git a/libcxx/src/include/ryu/ryu.h b/libcxx/src/include/ryu/ryu.h
index 7b19ecfec5915a..85831bed61b21c 100644
--- a/libcxx/src/include/ryu/ryu.h
+++ b/libcxx/src/include/ryu/ryu.h
@@ -43,6 +43,7 @@
// Avoid formatting to keep the changes with the original code minimal.
// clang-format off
+#include <__bit/countr.h>
#include <__charconv/chars_format.h>
#include <__charconv/to_chars_result.h>
#include <__config>
@@ -72,7 +73,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward64(unsigned long* __in
if (__mask == 0) {
return false;
}
- *__index = __builtin_ctzll(__mask);
+ *__index = __libcpp_ctz(__mask);
return true;
}
@@ -80,7 +81,7 @@ _LIBCPP_HIDE_FROM_ABI inline unsigned char _BitScanForward(unsigned long* __inde
if (__mask == 0) {
return false;
}
- *__index = __builtin_ctz(__mask);
+ *__index = __libcpp_ctz(__mask);
return true;
}
#endif // !_MSC_VER
diff --git a/libcxx/src/ryu/f2s.cpp b/libcxx/src/ryu/f2s.cpp
index f42fbd68c91d2d..e7b5d39669f990 100644
--- a/libcxx/src/ryu/f2s.cpp
+++ b/libcxx/src/ryu/f2s.cpp
@@ -107,7 +107,7 @@ inline constexpr uint64_t __FLOAT_POW5_SPLIT[47] = {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI inline bool __multipleOfPowerOf2(const uint32_t __value, const uint32_t __p) {
_LIBCPP_ASSERT_INTERNAL(__value != 0, "");
_LIBCPP_ASSERT_INTERNAL(__p < 32, "");
- // __builtin_ctz doesn't appear to be faster here.
+ // __builtin_ctz/__builtin_ctzg doesn't appear to be faster here.
return (__value & ((1u << __p) - 1)) == 0;
}
>From fc3821cf371dbf6d6ba1ca27d41aeeaa93d68f3b Mon Sep 17 00:00:00 2001
From: Marc Auberer <marc.auberer at chillibits.com>
Date: Mon, 25 Mar 2024 22:30:22 +0100
Subject: [PATCH 2/2] Shortcuts that benefit from generic builtins
---
libcxx/include/__bit/countl.h | 4 ++++
libcxx/include/__bit/countr.h | 4 ++++
libcxx/include/__bit/popcount.h | 4 ++++
3 files changed, 12 insertions(+)
diff --git a/libcxx/include/__bit/countl.h b/libcxx/include/__bit/countl.h
index bd87f8903787ce..af0409950837b8 100644
--- a/libcxx/include/__bit/countl.h
+++ b/libcxx/include/__bit/countl.h
@@ -74,6 +74,9 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
if (__t == 0)
return numeric_limits<_Tp>::digits;
+#if __has_builtin(__builtin_clzg)
+ return __builtin_clzg(__t) - (numeric_limits<unsigned>::digits - numeric_limits<_Tp>::digits);
+#else
if (sizeof(_Tp) <= sizeof(unsigned int))
return std::__libcpp_clz(static_cast<unsigned int>(__t)) -
(numeric_limits<unsigned int>::digits - numeric_limits<_Tp>::digits);
@@ -95,6 +98,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _
}
return __ret + __iter;
}
+#endif // __has_builtin(__builtin_clzg)
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/countr.h b/libcxx/include/__bit/countr.h
index af347688888739..6c2b2a74ee7ac7 100644
--- a/libcxx/include/__bit/countr.h
+++ b/libcxx/include/__bit/countr.h
@@ -52,6 +52,9 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
if (__t == 0)
return numeric_limits<_Tp>::digits;
+#if __has_builtin(__builtin_ctz)
+ return __builtin_ctz(__t);
+#else
if (sizeof(_Tp) <= sizeof(unsigned int))
return std::__libcpp_ctz(static_cast<unsigned int>(__t));
else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -67,6 +70,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __coun
}
return __ret + std::__libcpp_ctz(static_cast<unsigned long long>(__t));
}
+#endif // __has_builtin(__builtin_ctz)
}
#if _LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__bit/popcount.h b/libcxx/include/__bit/popcount.h
index 85ba84a572dd83..c5a68749516dbb 100644
--- a/libcxx/include/__bit/popcount.h
+++ b/libcxx/include/__bit/popcount.h
@@ -51,6 +51,9 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_popcount(unsigned lo
template <__libcpp_unsigned_integer _Tp>
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noexcept {
+#if __has_builtin(__builtin_popcount)
+ return __builtin_popcount(__t);
+#else
if (sizeof(_Tp) <= sizeof(unsigned int))
return std::__libcpp_popcount(static_cast<unsigned int>(__t));
else if (sizeof(_Tp) <= sizeof(unsigned long))
@@ -65,6 +68,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr int popcount(_Tp __t) noex
}
return __ret;
}
+#endif // __has_builtin(__builtin_popcount)
}
#endif // _LIBCPP_STD_VER >= 20
More information about the libcxx-commits
mailing list