[llvm] 83d56fb - Drop the ZeroBehavior parameter from countLeadingZeros and the like (NFC)

Wed Jan 18 19:58:50 PST 2023

Author: Kazu Hirata
Date: 2023-01-18T19:58:44-08:00
New Revision: 83d56fb17a4d78471125df4249c3557bd4ddb5c2

URL: https://github.com/llvm/llvm-project/commit/83d56fb17a4d78471125df4249c3557bd4ddb5c2
DIFF: https://github.com/llvm/llvm-project/commit/83d56fb17a4d78471125df4249c3557bd4ddb5c2.diff

LOG: Drop the ZeroBehavior parameter from countLeadingZeros and the like (NFC)

This patch drops the ZeroBehavior parameter from bit counting
functions like countLeadingZeros.  ZeroBehavior specifies the behavior
when the input to count{Leading,Trailing}Zeros is zero and when the
input to count{Leading,Trailing}Ones is all ones.

ZeroBehavior was first introduced on May 24, 2013 in commit
eb91eac9fb866ab1243366d2e238b9961895612d.  While that patch did not
state the intention, I would guess ZeroBehavior was for performance
reasons.  The x86 machines around that time required a conditional
branch to implement countLeadingZero<uint32_t> that returns the 32 on
zero:

        test    edi, edi
        je      .LBB0_2
        bsr     eax, edi
        xor     eax, 31
.LBB1_2:
        mov     eax, 32

That is, we can remove the conditional branch if we don't care about
the behavior on zero.

IIUC, Intel's Haswell architecture, launched on June 4, 2013,
introduced several bit manipulation instructions, including lzcnt and
tzcnt, which eliminated the need for the conditional branch.

I think it's time to retire ZeroBehavior as its utility is very
limited.  If you care about compilation speed, you should build LLVM
with an appropriate -march= to take advantage of lzcnt and tzcnt.
Even if not, modern host compilers should be able to optimize away
quite a few conditional branches because the input is often known to
be nonzero from dominating conditional branches.

Differential Revision: https://reviews.llvm.org/D141798

Added: 
    

Modified: 
    llvm/include/llvm/Support/MathExtras.h
    llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
    llvm/lib/Transforms/IPO/LowerTypeTests.cpp
    llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
    mlir/lib/Bytecode/Reader/BytecodeReader.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index ddde8a2607536..45bd38ac9d5be 100644

--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -41,9 +41,7 @@ enum ZeroBehavior {
   /// The returned value is undefined.
   ZB_Undefined,
   /// The returned value is numeric_limits<T>::max()
-  ZB_Max,
-  /// The returned value is numeric_limits<T>::digits
-  ZB_Width
+  ZB_Max
 };
 
 /// Mathematical constants.
@@ -84,7 +82,7 @@ constexpr float ef          = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static unsigned count(T Val) {
     if (!Val)
       return std::numeric_limits<T>::digits;
     if (Val & 0x1)
@@ -108,8 +106,8 @@ template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
 
 #if defined(__GNUC__) || defined(_MSC_VER)
 template <typename T> struct TrailingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 32;
 
 #if __has_builtin(__builtin_ctz) || defined(__GNUC__)
@@ -124,8 +122,8 @@ template <typename T> struct TrailingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct TrailingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 64;
 
 #if __has_builtin(__builtin_ctzll) || defined(__GNUC__)
@@ -146,18 +144,16 @@ template <typename T> struct TrailingZerosCounter<T, 8> {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
-///   valid arguments.
-template <typename T>
-unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> unsigned countTrailingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
+  return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val);
 }
 
 namespace detail {
 template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
-  static unsigned count(T Val, ZeroBehavior) {
+  static unsigned count(T Val) {
     if (!Val)
       return std::numeric_limits<T>::digits;
 
@@ -176,8 +172,8 @@ template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
 
 #if defined(__GNUC__) || defined(_MSC_VER)
 template <typename T> struct LeadingZerosCounter<T, 4> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 32;
 
 #if __has_builtin(__builtin_clz) || defined(__GNUC__)
@@ -192,8 +188,8 @@ template <typename T> struct LeadingZerosCounter<T, 4> {
 
 #if !defined(_MSC_VER) || defined(_M_X64)
 template <typename T> struct LeadingZerosCounter<T, 8> {
-  static unsigned count(T Val, ZeroBehavior ZB) {
-    if (ZB != ZB_Undefined && Val == 0)
+  static unsigned count(T Val) {
+    if (Val == 0)
       return 64;
 
 #if __has_builtin(__builtin_clzll) || defined(__GNUC__)
@@ -214,13 +210,11 @@ template <typename T> struct LeadingZerosCounter<T, 8> {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
-///   valid arguments.
-template <typename T>
-unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of 0.
+template <typename T> unsigned countLeadingZeros(T Val) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
+  return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val);
 }
 
 /// Get the index of the first set bit starting from the least
@@ -228,13 +222,12 @@ unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
-///   valid arguments.
+/// \param ZB the behavior on an input of 0.
 template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
   if (ZB == ZB_Max && Val == 0)
     return std::numeric_limits<T>::max();
 
-  return countTrailingZeros(Val, ZB_Undefined);
+  return countTrailingZeros(Val);
 }
 
 /// Create a bitmask with the N right-most bits set to 1, and all other
@@ -269,16 +262,14 @@ template <typename T> T maskLeadingZeros(unsigned N) {
 ///
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
-///   valid arguments.
+/// \param ZB the behavior on an input of 0.
 template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
   if (ZB == ZB_Max && Val == 0)
     return std::numeric_limits<T>::max();
 
   // Use ^ instead of - because both gcc and llvm can remove the associated ^
   // in the __builtin_clz intrinsic on x86.
-  return countLeadingZeros(Val, ZB_Undefined) ^
-         (std::numeric_limits<T>::digits - 1);
+  return countLeadingZeros(Val) ^ (std::numeric_limits<T>::digits - 1);
 }
 
 /// Macro compressed bit reversal table for 256 bits.
@@ -470,13 +461,11 @@ constexpr inline bool isPowerOf2_64(uint64_t Value) {
 /// Ex. countLeadingOnes(0xFF0FFF00) == 8.
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of all ones. Only ZB_Width and
-/// ZB_Undefined are valid arguments.
-template <typename T>
-unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> unsigned countLeadingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countLeadingZeros<T>(~Value, ZB);
+  return countLeadingZeros<T>(~Value);
 }
 
 /// Count the number of ones from the least significant bit to the first
@@ -485,13 +474,11 @@ unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
 /// Ex. countTrailingOnes(0x00FF00FF) == 8.
 /// Only unsigned integral types are allowed.
 ///
-/// \param ZB the behavior on an input of all ones. Only ZB_Width and
-/// ZB_Undefined are valid arguments.
-template <typename T>
-unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
+/// Returns std::numeric_limits<T>::digits on an input of all ones.
+template <typename T> unsigned countTrailingOnes(T Value) {
   static_assert(std::is_unsigned_v<T>,
                 "Only unsigned integral types are allowed.");
-  return countTrailingZeros<T>(~Value, ZB);
+  return countTrailingZeros<T>(~Value);
 }
 
 /// Count the number of set bits in a value.
@@ -622,7 +609,7 @@ constexpr inline uint64_t NextPowerOf2(uint64_t A) {
 /// Essentially, it is a floor operation across the domain of powers of two.
 inline uint64_t PowerOf2Floor(uint64_t A) {
   if (!A) return 0;
-  return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
+  return 1ull << (63 - countLeadingZeros(A));
 }
 
 /// Returns the power of two which is greater than or equal to the given value.

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index e1f881944ce0a..da819b6d4a239 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -701,8 +701,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
       if ((PsInputBits & 0x7F) == 0 ||
           ((PsInputBits & 0xF) == 0 &&
            (PsInputBits >> 11 & 1)))
-        Info->markPSInputEnabled(
-          countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
+        Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
     }
   }
 

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 329f08004abfb..3504a0269e822 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2451,8 +2451,7 @@ SDValue SITargetLowering::LowerFormalArguments(
       unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
       if ((PsInputBits & 0x7F) == 0 ||
           ((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
-        Info->markPSInputEnabled(
-            countTrailingZeros(Info->getPSInputAddr(), ZB_Undefined));
+        Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
     }
   } else if (IsKernel) {
     assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());

diff  --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 2188cad8eeb93..8ead0b927555d 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -180,7 +180,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   // or ADDIW. If there are trailing zeros, try generating a sign extended
   // constant with no trailing zeros and use a final SLLI to restore them.
   if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) {
-    unsigned TrailingZeros = countTrailingZeros((uint64_t)Val, ZB_Undefined);
+    unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
     int64_t ShiftedVal = Val >> TrailingZeros;
     // If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
     // its more compressible. But only if LUI+ADDI(W) isn't fusable.
@@ -202,7 +202,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
   if (Val > 0 && Res.size() > 2) {
     assert(ActiveFeatures[RISCV::Feature64Bit] &&
            "Expected RV32 to only need 2 instructions");
-    unsigned LeadingZeros = countLeadingZeros((uint64_t)Val, ZB_Undefined);
+    unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
     uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
     // Fill in the bits that will be shifted out with 1s. An example where this
     // helps is trailing one masks with 32 or more ones. This will generate

diff  --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 0a16b882242d1..ddfcace6acf82 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -172,7 +172,7 @@ BitSetInfo BitSetBuilder::build() {
 
   BSI.AlignLog2 = 0;
   if (Mask != 0)
-    BSI.AlignLog2 = countTrailingZeros(Mask, ZB_Undefined);
+    BSI.AlignLog2 = countTrailingZeros(Mask);
 
   // Build the compressed bitset while normalizing the offsets against the
   // computed alignment.

diff  --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 775982c0812c7..487a0a4a97f7f 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -259,8 +259,7 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
         if (I < B.size())
           BitsUsed |= B[I];
       if (BitsUsed != 0xff)
-        return (MinByte + I) * 8 +
-               countTrailingZeros(uint8_t(~BitsUsed), ZB_Undefined);
+        return (MinByte + I) * 8 + countTrailingZeros(uint8_t(~BitsUsed));
     }
   } else {
     // Find a free (Size/8) byte region in each member of Used.

diff  --git a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
index 8dded643a45cd..53e55470fcd35 100644
--- a/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
+++ b/mlir/lib/Bytecode/Reader/BytecodeReader.cpp
@@ -281,8 +281,7 @@ class EncodingReader {
     // here because we only care about the first byte, and so that be actually
     // get ctz intrinsic calls when possible (the `uint8_t` overload uses a loop
     // implementation).
-    uint32_t numBytes =
-        llvm::countTrailingZeros<uint32_t>(result, llvm::ZB_Undefined);
+    uint32_t numBytes = llvm::countTrailingZeros<uint32_t>(result);
     assert(numBytes > 0 && numBytes <= 7 &&
            "unexpected number of trailing zeros in varint encoding");