[libc-commits] [libc] d92e541 - [libc] Use __builtin_clz to find leading 1 in hypot

Siva Chandra Reddy via libc-commits libc-commits at lists.llvm.org
Thu Jan 20 15:32:54 PST 2022


Author: Clint Caywood
Date: 2022-01-20T23:32:34Z
New Revision: d92e5412ea571158b5b524855d19b5eafa0567ce

URL: https://github.com/llvm/llvm-project/commit/d92e5412ea571158b5b524855d19b5eafa0567ce
DIFF: https://github.com/llvm/llvm-project/commit/d92e5412ea571158b5b524855d19b5eafa0567ce.diff

LOG: [libc] Use __builtin_clz to find leading 1 in hypot

This is an optimization that using a single CPU instruction on supported
architectures (amd64 and aarch64, but possibly others) to replace what was
previously an iterative look-up-table algorithm.

Originally I suggested using inline assembly for this in
https://reviews.llvm.org/D117584.

Reviewed By: lntue, sivachandra

Differential Revision: https://reviews.llvm.org/D117684

Added: 
    

Modified: 
    libc/src/__support/FPUtil/Hypot.h

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 15b26798ccb5..bb658b0085fe 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -22,33 +22,39 @@ namespace internal {
 template <typename T>
 static inline T find_leading_one(T mant, int &shift_length);
 
+// The following overloads are matched based on what is accepted by
+// __builtin_clz* rather than using the exactly-sized aliases from stdint.h
+// (such as uint32_t). There are 3 overloads even though 2 will only ever be
+// used by a specific platform, since unsigned long varies in size depending on
+// the word size of the architecture.
+
 template <>
-inline uint32_t find_leading_one<uint32_t>(uint32_t mant, int &shift_length) {
+inline unsigned int find_leading_one<unsigned int>(unsigned int mant,
+                                                   int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 5;
-  constexpr uint32_t BOUNDS[NSTEPS] = {1 << 16, 1 << 8, 1 << 4, 1 << 2, 1 << 1};
-  constexpr int SHIFTS[NSTEPS] = {16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clz(mant);
   }
   return 1U << shift_length;
 }
 
 template <>
-inline uint64_t find_leading_one<uint64_t>(uint64_t mant, int &shift_length) {
+inline unsigned long find_leading_one<unsigned long>(unsigned long mant,
+                                                     int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 6;
-  constexpr uint64_t BOUNDS[NSTEPS] = {1ULL << 32, 1ULL << 16, 1ULL << 8,
-                                       1ULL << 4,  1ULL << 2,  1ULL << 1};
-  constexpr int SHIFTS[NSTEPS] = {32, 16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzl(mant);
+  }
+  return 1UL << shift_length;
+}
+
+template <>
+inline unsigned long long
+find_leading_one<unsigned long long>(unsigned long long mant,
+                                     int &shift_length) {
+  shift_length = 0;
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzll(mant);
   }
   return 1ULL << shift_length;
 }


        


More information about the libc-commits mailing list