[llvm] 0c4f53f - Reland: Drop the ZeroBehavior parameter from countLeadingZeros and the like (NFC)

Fri Jan 20 18:42:10 PST 2023

Author: Kazu Hirata
Date: 2023-01-20T18:42:03-08:00
New Revision: 0c4f53fcff794d8ad4b1b43443affc37c2061f55

URL: https://github.com/llvm/llvm-project/commit/0c4f53fcff794d8ad4b1b43443affc37c2061f55
DIFF: https://github.com/llvm/llvm-project/commit/0c4f53fcff794d8ad4b1b43443affc37c2061f55.diff

LOG: Reland: Drop the ZeroBehavior parameter from countLeadingZeros and the like (NFC)

This patch drops the ZeroBehavior parameter from bit counting
functions like countLeadingZeros.  ZeroBehavior specifies the behavior
when the input to count{Leading,Trailing}Zeros is zero and when the
input to count{Leading,Trailing}Ones is all ones.

ZeroBehavior was first introduced on May 24, 2013 in commit
eb91eac9fb866ab1243366d2e238b9961895612d.  While that patch did not
state the intention, I would guess ZeroBehavior was for performance
reasons.  The x86 machines around that time required a conditional
branch to implement countLeadingZero<uint32_t> that returns the 32 on
zero:

        test    edi, edi
        je      .LBB0_2
        bsr     eax, edi
        xor     eax, 31
.LBB1_2:
        mov     eax, 32

That is, we can remove the conditional branch if we don't care about
the behavior on zero.

IIUC, Intel's Haswell architecture, launched on June 4, 2013,
introduced several bit manipulation instructions, including lzcnt and
tzcnt, which eliminated the need for the conditional branch.

I think it's time to retire ZeroBehavior as its utility is very
limited.  If you care about compilation speed, you should build LLVM
with an appropriate -march= to take advantage of lzcnt and tzcnt.
Even if not, modern host compilers should be able to optimize away
quite a few conditional branches because the input is often known to
be nonzero from dominating conditional branches.

In this iteration, I've moved the forward declarations of
_BitScanForward outside the llvm space to fix builds on Windows.

Differential Revision: https://reviews.llvm.org/D141798

Added: 
    

Modified: 
    llvm/include/llvm/ADT/bit.h
    llvm/include/llvm/Support/MathExtras.h
    llvm/unittests/ADT/BitTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index 1757aabc80cee..126e57ea6a277 100644

--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -17,8 +17,21 @@
 #include "llvm/Support/Compiler.h"
 #include <cstdint>
 #include <cstring>
+#include <limits>
 #include <type_traits>
 
+#ifdef _MSC_VER
+// Declare these intrinsics manually rather including intrin.h. It's very
+// expensive, and bit.h is popular via MathExtras.h.
+// #include <intrin.h>
+extern "C" {
+unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
+unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
+unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
+unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
+}
+#endif
+
 namespace llvm {
 
 // This implementation of bit_cast is 
diff erent from the C++20 one in two ways:
@@ -41,6 +54,169 @@ constexpr inline bool has_single_bit(T Value) noexcept {
   return (Value != 0) && ((Value & (Value - 1)) == 0);
 }
 
+namespace detail {
+template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
+  static unsigned count(T Val) {
+    if (!Val)
+      return std::numeric_limits<T>::digits;
+    if (Val & 0x1)
+      return 0;
+
+    // Bisection method.
+    unsigned ZeroBits = 0;
+    T Shift = std::numeric_limits<T>::digits >> 1;
+    T Mask = std::numeric_limits<T>::max() >> Shift;
+    while (Shift) {
+      if ((Val & Mask) == 0) {
+        Val >>= Shift;
+        ZeroBits |= Shift;
+      }
+      Shift >>= 1;
+      Mask >>= Shift;
+    }
+    return ZeroBits;
+  }
+};
+
+#if defined(__GNUC__) || defined(_MSC_VER)
+template <typename T> struct TrailingZerosCounter<T, 4> {
+  static unsigned count(T Val) {
+    if (Val == 0)
+      return 32;
+
+#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
+    return __builtin_ctz(Val);
+#elif defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanForward(&Index, Val);
+    return Index;
+#endif
+  }
+};
+
+#if !defined(_MSC_VER) || defined(_M_X64)
+template <typename T> struct TrailingZerosCounter<T, 8> {
+  static unsigned count(T Val) {
+    if (Val == 0)
+      return 64;
+
+#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
+    return __builtin_ctzll(Val);
+#elif defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanForward64(&Index, Val);
+    return Index;
+#endif
+  }
+};
+#endif
+#endif
+} // namespace detail
+
+/// Count number of 0's from the least significant bit to the most
+///   stopping at the first 1.
+///
+/// Only unsigned integral types are allowed.
+///
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> int countr_zero(T Val) {
+  static_assert(std::is_unsigned_v<T>,
+                "Only unsigned integral types are allowed.");
+  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
+}
+
+namespace detail {
+template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
+  static unsigned count(T Val) {
+    if (!Val)
+      return std::numeric_limits<T>::digits;
+
+    // Bisection method.
+    unsigned ZeroBits = 0;
+    for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
+      T Tmp = Val >> Shift;
+      if (Tmp)
+        Val = Tmp;
+      else
+        ZeroBits |= Shift;
+    }
+    return ZeroBits;
+  }
+};
+
+#if defined(__GNUC__) || defined(_MSC_VER)
+template <typename T> struct LeadingZerosCounter<T, 4> {
+  static unsigned count(T Val) {
+    if (Val == 0)
+      return 32;
+
+#if __has_builtin(__builtin_clz) || defined(__GNUC__)
+    return __builtin_clz(Val);
+#elif defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanReverse(&Index, Val);
+    return Index ^ 31;
+#endif
+  }
+};
+
+#if !defined(_MSC_VER) || defined(_M_X64)
+template <typename T> struct LeadingZerosCounter<T, 8> {
+  static unsigned count(T Val) {
+    if (Val == 0)
+      return 64;
+
+#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
+    return __builtin_clzll(Val);
+#elif defined(_MSC_VER)
+    unsigned long Index;
+    _BitScanReverse64(&Index, Val);
+    return Index ^ 63;
+#endif
+  }
+};
+#endif
+#endif
+} // namespace detail
+
+/// Count number of 0's from the most significant bit to the least
+///   stopping at the first 1.
+///
+/// Only unsigned integral types are allowed.
+///
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> int countl_zero(T Val) {
+  static_assert(std::is_unsigned_v<T>,
+                "Only unsigned integral types are allowed.");
+  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
+}
+
+/// Count the number of ones from the most significant bit to the first
+/// zero bit.
+///
+/// Ex. countl_one(0xFF0FFF00) == 8.
+/// Only unsigned integral types are allowed.
+///
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> int countl_one(T Value) {
+  static_assert(std::is_unsigned_v<T>,
+                "Only unsigned integral types are allowed.");
+  return llvm::countl_zero<T>(~Value);
+}
+
+/// Count the number of ones from the least significant bit to the first
+/// zero bit.
+///
+/// Ex. countr_one(0x00FF00FF) == 8.
+/// Only unsigned integral types are allowed.
+///
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> int countr_one(T Value) {
+  static_assert(std::is_unsigned_v<T>,
+                "Only unsigned integral types are allowed.");
+  return llvm::countr_zero<T>(~Value);
+}
+
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct PopulationCounter {
   static int count(T Value) {

diff  --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index 45bd38ac9d5be..9777a2b0c05df 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -22,18 +22,6 @@
 #include <limits>
 #include <type_traits>
 
-#ifdef _MSC_VER
-// Declare these intrinsics manually rather including intrin.h. It's very
-// expensive, and MathExtras.h is popular.
-// #include <intrin.h>
-extern "C" {
-unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
-unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
-unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
-unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
-}
-#endif
-
 namespace llvm {
 
 /// The behavior an operation has on an input of 0.
@@ -80,65 +68,6 @@ constexpr float ef          = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A
                 phif        = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
 } // namespace numbers
 
-namespace detail {
-template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static unsigned count(T Val) {
-    if (!Val)
-      return std::numeric_limits<T>::digits;
-    if (Val & 0x1)
-      return 0;
-
-    // Bisection method.
-    unsigned ZeroBits = 0;
-    T Shift = std::numeric_limits<T>::digits >> 1;
-    T Mask = std::numeric_limits<T>::max() >> Shift;
-    while (Shift) {
-      if ((Val & Mask) == 0) {
-        Val >>= Shift;
-        ZeroBits |= Shift;
-      }
-      Shift >>= 1;
-      Mask >>= Shift;
-    }
-    return ZeroBits;
-  }
-};
-
-#if defined(__GNUC__) || defined(_MSC_VER)
-template <typename T> struct TrailingZerosCounter<T, 4> {
-  static unsigned count(T Val) {
-    if (Val == 0)
-      return 32;
-
-#if __has_builtin(__builtin_ctz) || defined(__GNUC__)
-    return __builtin_ctz(Val);
-#elif defined(_MSC_VER)
-    unsigned long Index;
-    _BitScanForward(&Index, Val);
-    return Index;
-#endif
-  }
-};
-
-#if !defined(_MSC_VER) || defined(_M_X64)
-template <typename T> struct TrailingZerosCounter<T, 8> {
-  static unsigned count(T Val) {
-    if (Val == 0)
-      return 64;
-
-#if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
-    return __builtin_ctzll(Val);
-#elif defined(_MSC_VER)
-    unsigned long Index;
-    _BitScanForward64(&Index, Val);
-    return Index;
-#endif
-  }
-};
-#endif
-#endif
-} // namespace detail
-
 /// Count number of 0's from the least significant bit to the most
 ///   stopping at the first 1.
 ///
@@ -148,62 +77,8 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 template <typename T> unsigned countTrailingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
-}
-
-namespace detail {
-template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static unsigned count(T Val) {
-    if (!Val)
-      return std::numeric_limits<T>::digits;
-
-    // Bisection method.
-    unsigned ZeroBits = 0;
-    for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
-      T Tmp = Val >> Shift;
-      if (Tmp)
-        Val = Tmp;
-      else
-        ZeroBits |= Shift;
-    }
-    return ZeroBits;
-  }
-};
-
-#if defined(__GNUC__) || defined(_MSC_VER)
-template <typename T> struct LeadingZerosCounter<T, 4> {
-  static unsigned count(T Val) {
-    if (Val == 0)
-      return 32;
-
-#if __has_builtin(__builtin_clz) || defined(__GNUC__)
-    return __builtin_clz(Val);
-#elif defined(_MSC_VER)
-    unsigned long Index;
-    _BitScanReverse(&Index, Val);
-    return Index ^ 31;
-#endif
-  }
-};
-
-#if !defined(_MSC_VER) || defined(_M_X64)
-template <typename T> struct LeadingZerosCounter<T, 8> {
-  static unsigned count(T Val) {
-    if (Val == 0)
-      return 64;
-
-#if __has_builtin(__builtin_clzll) || defined(__GNUC__)
-    return __builtin_clzll(Val);
-#elif defined(_MSC_VER)
-    unsigned long Index;
-    _BitScanReverse64(&Index, Val);
-    return Index ^ 63;
-#endif
-  }
-};
-#endif
-#endif
-} // namespace detail
+  return llvm::countr_zero(Val);
+}
 
 /// Count number of 0's from the most significant bit to the least
 ///   stopping at the first 1.
@@ -214,7 +89,7 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 template <typename T> unsigned countLeadingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
+  return llvm::countl_zero(Val);
 }
 
 /// Get the index of the first set bit starting from the least
@@ -465,7 +340,7 @@ constexpr inline bool isPowerOf2_64(uint64_t Value) {
 template <typename T> unsigned countLeadingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countLeadingZeros<T>(~Value);
+  return llvm::countl_one<T>(Value);
 }
 
 /// Count the number of ones from the least significant bit to the first
@@ -478,7 +353,7 @@ template <typename T> unsigned countLeadingOnes(T Value) {
 template <typename T> unsigned countTrailingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countTrailingZeros<T>(~Value);
+  return llvm::countr_one<T>(Value);
 }
 
 /// Count the number of set bits in a value.

diff  --git a/llvm/unittests/ADT/BitTest.cpp b/llvm/unittests/ADT/BitTest.cpp
index bd3e3439f1e63..7f154d75b3533 100644
--- a/llvm/unittests/ADT/BitTest.cpp
+++ b/llvm/unittests/ADT/BitTest.cpp
@@ -48,6 +48,102 @@ TEST(BitTest, HasSingleBit) {
   EXPECT_TRUE(llvm::has_single_bit(static_cast<uint16_t>(kValueS16)));
 }
 
+TEST(BitTest, CountlZero) {
+  uint8_t Z8 = 0;
+  uint16_t Z16 = 0;
+  uint32_t Z32 = 0;
+  uint64_t Z64 = 0;
+  EXPECT_EQ(8, llvm::countl_zero(Z8));
+  EXPECT_EQ(16, llvm::countl_zero(Z16));
+  EXPECT_EQ(32, llvm::countl_zero(Z32));
+  EXPECT_EQ(64, llvm::countl_zero(Z64));
+
+  uint8_t NZ8 = 42;
+  uint16_t NZ16 = 42;
+  uint32_t NZ32 = 42;
+  uint64_t NZ64 = 42;
+  EXPECT_EQ(2, llvm::countl_zero(NZ8));
+  EXPECT_EQ(10, llvm::countl_zero(NZ16));
+  EXPECT_EQ(26, llvm::countl_zero(NZ32));
+  EXPECT_EQ(58, llvm::countl_zero(NZ64));
+
+  EXPECT_EQ(8, llvm::countl_zero(0x00F000FFu));
+  EXPECT_EQ(8, llvm::countl_zero(0x00F12345u));
+  for (unsigned i = 0; i <= 30; ++i) {
+    EXPECT_EQ(int(31 - i), llvm::countl_zero(1u << i));
+  }
+
+  EXPECT_EQ(8, llvm::countl_zero(0x00F1234500F12345ULL));
+  EXPECT_EQ(1, llvm::countl_zero(1ULL << 62));
+  for (unsigned i = 0; i <= 62; ++i) {
+    EXPECT_EQ(int(63 - i), llvm::countl_zero(1ULL << i));
+  }
+}
+
+TEST(BitTest, CountrZero) {
+  uint8_t Z8 = 0;
+  uint16_t Z16 = 0;
+  uint32_t Z32 = 0;
+  uint64_t Z64 = 0;
+  EXPECT_EQ(8, llvm::countr_zero(Z8));
+  EXPECT_EQ(16, llvm::countr_zero(Z16));
+  EXPECT_EQ(32, llvm::countr_zero(Z32));
+  EXPECT_EQ(64, llvm::countr_zero(Z64));
+
+  uint8_t NZ8 = 42;
+  uint16_t NZ16 = 42;
+  uint32_t NZ32 = 42;
+  uint64_t NZ64 = 42;
+  EXPECT_EQ(1, llvm::countr_zero(NZ8));
+  EXPECT_EQ(1, llvm::countr_zero(NZ16));
+  EXPECT_EQ(1, llvm::countr_zero(NZ32));
+  EXPECT_EQ(1, llvm::countr_zero(NZ64));
+}
+
+TEST(BitTest, CountlOne) {
+  for (int i = 30; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(31 - i, llvm::countl_one(0xFFFFFFFF ^ (1 << i)));
+  }
+  for (int i = 62; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(63 - i, llvm::countl_one(0xFFFFFFFFFFFFFFFFULL ^ (1LL << i)));
+  }
+  for (int i = 30; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(31 - i, llvm::countl_one(0xFFFFFFFF ^ (1 << i)));
+  }
+}
+
+TEST(BitTest, CountrOne) {
+  uint8_t AllOnes8 = ~(uint8_t)0;
+  uint16_t AllOnes16 = ~(uint16_t)0;
+  uint32_t AllOnes32 = ~(uint32_t)0;
+  uint64_t AllOnes64 = ~(uint64_t)0;
+  EXPECT_EQ(8, llvm::countr_one(AllOnes8));
+  EXPECT_EQ(16, llvm::countr_one(AllOnes16));
+  EXPECT_EQ(32, llvm::countr_one(AllOnes32));
+  EXPECT_EQ(64, llvm::countr_one(AllOnes64));
+
+  uint8_t X8 = 6;
+  uint16_t X16 = 6;
+  uint32_t X32 = 6;
+  uint64_t X64 = 6;
+  EXPECT_EQ(0, llvm::countr_one(X8));
+  EXPECT_EQ(0, llvm::countr_one(X16));
+  EXPECT_EQ(0, llvm::countr_one(X32));
+  EXPECT_EQ(0, llvm::countr_one(X64));
+
+  uint8_t Y8 = 23;
+  uint16_t Y16 = 23;
+  uint32_t Y32 = 23;
+  uint64_t Y64 = 23;
+  EXPECT_EQ(3, llvm::countr_one(Y8));
+  EXPECT_EQ(3, llvm::countr_one(Y16));
+  EXPECT_EQ(3, llvm::countr_one(Y32));
+  EXPECT_EQ(3, llvm::countr_one(Y64));
+}
+
 TEST(BitTest, PopCount) {
   EXPECT_EQ(0, llvm::popcount(0U));
   EXPECT_EQ(0, llvm::popcount(0ULL));