[libc-commits] [PATCH] D120579: Use __builtin_clz to find leading 1 in generic sqrt (where possible)
Clint Caywood via Phabricator via libc-commits
libc-commits at lists.llvm.org
Fri Feb 25 11:17:59 PST 2022
cratonica updated this revision to Diff 411471.
cratonica added a comment.
Attempting to fix patch
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D120579/new/
https://reviews.llvm.org/D120579
Files:
libc/src/__support/FPUtil/generic/sqrt.h
Index: libc/src/__support/FPUtil/generic/sqrt.h
===================================================================
--- libc/src/__support/FPUtil/generic/sqrt.h
+++ libc/src/__support/FPUtil/generic/sqrt.h
@@ -31,47 +31,28 @@
};
#endif // SPECIAL_X86_LONG_DOUBLE
-template <typename T>
-static inline void normalize(int &exponent,
- typename FPBits<T>::UIntType &mantissa);
-
-template <> inline void normalize<float>(int &exponent, uint32_t &mantissa) {
- // Use binary search to shift the leading 1 bit.
- // With MantissaWidth<float> = 23, it will take
- // ceil(log2(23)) = 5 steps checking the mantissa bits as followed:
- // Step 1: 0000 0000 0000 XXXX XXXX XXXX
- // Step 2: 0000 00XX XXXX XXXX XXXX XXXX
- // Step 3: 000X XXXX XXXX XXXX XXXX XXXX
- // Step 4: 00XX XXXX XXXX XXXX XXXX XXXX
- // Step 5: 0XXX XXXX XXXX XXXX XXXX XXXX
- constexpr int NSTEPS = 5; // = ceil(log2(MantissaWidth))
- constexpr uint32_t BOUNDS[NSTEPS] = {1 << 12, 1 << 18, 1 << 21, 1 << 22,
- 1 << 23};
- constexpr int SHIFTS[NSTEPS] = {12, 6, 3, 2, 1};
-
- for (int i = 0; i < NSTEPS; ++i) {
- if (mantissa < BOUNDS[i]) {
- exponent -= SHIFTS[i];
- mantissa <<= SHIFTS[i];
- }
- }
+// The following overloads are matched based on what is accepted by
+// __builtin_clz* rather than using the exactly-sized aliases from stdint.h.
+// This way, we can avoid making any assumptions about integer sizes and let the
+// compiler match for us.
+template <typename T> static inline int clz(T val);
+template <> inline int clz<unsigned int>(unsigned int val) {
+ return __builtin_clz(val);
+}
+template <> inline int clz<unsigned long int>(unsigned long int val) {
+ return __builtin_clzl(val);
+}
+template <> inline int clz<unsigned long long int>(unsigned long long int val) {
+ return __builtin_clzll(val);
}
-template <> inline void normalize<double>(int &exponent, uint64_t &mantissa) {
- // Use binary search to shift the leading 1 bit similar to float.
- // With MantissaWidth<double> = 52, it will take
- // ceil(log2(52)) = 6 steps checking the mantissa bits.
- constexpr int NSTEPS = 6; // = ceil(log2(MantissaWidth))
- constexpr uint64_t BOUNDS[NSTEPS] = {1ULL << 26, 1ULL << 39, 1ULL << 46,
- 1ULL << 49, 1ULL << 51, 1ULL << 52};
- constexpr int SHIFTS[NSTEPS] = {27, 14, 7, 4, 2, 1};
-
- for (int i = 0; i < NSTEPS; ++i) {
- if (mantissa < BOUNDS[i]) {
- exponent -= SHIFTS[i];
- mantissa <<= SHIFTS[i];
- }
- }
+template <typename T>
+static inline void normalize(int &exponent,
+ typename FPBits<T>::UIntType &mantissa) {
+ const int shift =
+ clz(mantissa) - (8 * sizeof(mantissa) - 1 - MantissaWidth<T>::VALUE);
+ exponent -= shift;
+ mantissa <<= shift;
}
#ifdef LONG_DOUBLE_IS_DOUBLE
@@ -82,9 +63,9 @@
#elif !defined(SPECIAL_X86_LONG_DOUBLE)
template <>
inline void normalize<long double>(int &exponent, __uint128_t &mantissa) {
- // Use binary search to shift the leading 1 bit similar to float.
- // With MantissaWidth<long double> = 112, it will take
- // ceil(log2(112)) = 7 steps checking the mantissa bits.
+ // There is no __builtin_clz for 128-bit integers, so use binary search to
+ // shift the leading 1 bit. With MantissaWidth<long double> = 112, it will
+ // take ceil(log2(112)) = 7 steps checking the mantissa bits.
constexpr int NSTEPS = 7; // = ceil(log2(MantissaWidth))
constexpr __uint128_t BOUNDS[NSTEPS] = {
__uint128_t(1) << 56, __uint128_t(1) << 84, __uint128_t(1) << 98,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D120579.411471.patch
Type: text/x-patch
Size: 3622 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libc-commits/attachments/20220225/e0280be9/attachment.bin>
More information about the libc-commits
mailing list