[libc-commits] [libc] [llvm] [libc] Refactor `BigInt` (PR #86137)

Mon Mar 25 04:00:37 PDT 2024

https://github.com/gchatelet updated https://github.com/llvm/llvm-project/pull/86137

>From eef78beceb2ceda175eaf4c9f916ce28bb77aa57 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Fri, 8 Mar 2024 14:36:45 +0000
Subject: [PATCH 1/6] [libc] Refactor `BigInt`

This patch moves most of the multiprecision logic to the `multiword`
namespace and simplifies some logic in `BigInt`. It also fully
implements the mask and count functions. Additionnally many tests are
added.
---
 libc/src/__support/FPUtil/dyadic_float.h      |    6 +-
 libc/src/__support/UInt.h                     | 1056 ++++++++---------
 libc/src/__support/float_to_string.h          |    7 +-
 libc/src/__support/integer_literals.h         |    9 +
 libc/src/__support/math_extras.h              |  240 ++--
 libc/src/__support/number_pair.h              |   11 -
 libc/test/src/__support/math_extras_test.cpp  |   57 +
 libc/test/src/__support/uint_test.cpp         |  187 ++-
 .../libc/test/src/__support/BUILD.bazel       |    1 +
 9 files changed, 837 insertions(+), 737 deletions(-)

diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
index 73fd7381c3c838..e0c205f52383ba 100644
--- a/libc/src/__support/FPUtil/dyadic_float.h
+++ b/libc/src/__support/FPUtil/dyadic_float.h
@@ -58,9 +58,9 @@ template <size_t Bits> struct DyadicFloat {
   // significant bit.
   LIBC_INLINE constexpr DyadicFloat &normalize() {
     if (!mantissa.is_zero()) {
-      int shift_length = static_cast<int>(mantissa.clz());
+      int shift_length = cpp::countl_zero(mantissa);
       exponent -= shift_length;
-      mantissa.shift_left(static_cast<size_t>(shift_length));
+      mantissa <<= static_cast<size_t>(shift_length);
     }
     return *this;
   }
@@ -233,7 +233,7 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_add(DyadicFloat<Bits> a,
     result.sign = a.sign;
     result.exponent = a.exponent;
     result.mantissa = a.mantissa;
-    if (result.mantissa.add(b.mantissa)) {
+    if (result.mantissa.add_overflow(b.mantissa)) {
       // Mantissa addition overflow.
       result.shift_right(1);
       result.mantissa.val[DyadicFloat<Bits>::MantissaType::WORD_COUNT - 1] |=
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index df01e081e3c19e..96e346915721e7 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -17,7 +17,7 @@
 #include "src/__support/macros/attributes.h"   // LIBC_INLINE
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128, LIBC_TYPES_HAS_INT64
-#include "src/__support/math_extras.h" // SumCarry, DiffBorrow
+#include "src/__support/math_extras.h" // add_with_carry, sub_with_borrow
 #include "src/__support/number_pair.h"
 
 #include <stddef.h> // For size_t
@@ -25,64 +25,275 @@
 
 namespace LIBC_NAMESPACE {
 
-namespace internal {
-template <typename T> struct half_width;
+namespace multiword {
 
-template <> struct half_width<uint64_t> : cpp::type_identity<uint32_t> {};
-template <> struct half_width<uint32_t> : cpp::type_identity<uint16_t> {};
+// A type trait mapping unsigned integers to their half-width unsigned
+// counterparts.
+template <typename T> struct half_width;
 template <> struct half_width<uint16_t> : cpp::type_identity<uint8_t> {};
+template <> struct half_width<uint32_t> : cpp::type_identity<uint16_t> {};
+#ifdef LIBC_TYPES_HAS_INT64
+template <> struct half_width<uint64_t> : cpp::type_identity<uint32_t> {};
 #ifdef LIBC_TYPES_HAS_INT128
 template <> struct half_width<__uint128_t> : cpp::type_identity<uint64_t> {};
 #endif // LIBC_TYPES_HAS_INT128
-
+#endif // LIBC_TYPES_HAS_INT64
 template <typename T> using half_width_t = typename half_width<T>::type;
 
-template <typename T> constexpr NumberPair<T> full_mul(T a, T b) {
-  NumberPair<T> pa = split(a);
-  NumberPair<T> pb = split(b);
-  NumberPair<T> prod;
+// An array of two elements that can be used in multiword operations.
+template <typename T> struct Double final : cpp::array<T, 2> {
+  using UP = cpp::array<T, 2>;
+  using UP::UP;
+  LIBC_INLINE constexpr Double(T lo, T hi) : UP({lo, hi}) {}
+};
 
-  prod.lo = pa.lo * pb.lo;                    // exact
-  prod.hi = pa.hi * pb.hi;                    // exact
-  NumberPair<T> lo_hi = split(pa.lo * pb.hi); // exact
-  NumberPair<T> hi_lo = split(pa.hi * pb.lo); // exact
+// Converts an unsigned value into a Double<half_width_t<T>>.
+template <typename T> LIBC_INLINE constexpr auto split(T value) {
+  static_assert(cpp::is_unsigned_v<T>);
+  return cpp::bit_cast<Double<half_width_t<T>>>(value);
+}
 
-  constexpr size_t HALF_BIT_WIDTH = sizeof(T) * CHAR_BIT / 2;
+// The low part of a Double value.
+template <typename T> LIBC_INLINE constexpr T lo(const Double<T> &value) {
+  return value[0];
+}
+// The high part of a Double value.
+template <typename T> LIBC_INLINE constexpr T hi(const Double<T> &value) {
+  return value[1];
+}
+// The low part of an unsigned value.
+template <typename T> LIBC_INLINE constexpr half_width_t<T> lo(T value) {
+  return lo(split(value));
+}
+// The high part of an unsigned value.
+template <typename T> LIBC_INLINE constexpr half_width_t<T> hi(T value) {
+  return hi(split(value));
+}
 
-  auto r1 = add_with_carry(prod.lo, lo_hi.lo << HALF_BIT_WIDTH, T(0));
-  prod.lo = r1.sum;
-  prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum;
+// Returns 'a' times 'b' in a Double<word>. Cannot overflow by definition.
+template <typename word>
+LIBC_INLINE constexpr Double<word> mul2(word a, word b) {
+  if constexpr (cpp::is_same_v<word, uint8_t>) {
+    return split<uint16_t>(uint16_t(a) * uint16_t(b));
+  } else if constexpr (cpp::is_same_v<word, uint16_t>) {
+    return split<uint32_t>(uint32_t(a) * uint32_t(b));
+  }
+#ifdef LIBC_TYPES_HAS_INT64
+  else if constexpr (cpp::is_same_v<word, uint32_t>) {
+    return split<uint64_t>(uint64_t(a) * uint64_t(b));
+  }
+#endif
+#ifdef LIBC_TYPES_HAS_INT128
+  else if constexpr (cpp::is_same_v<word, uint64_t>) {
+    return split<__uint128_t>(__uint128_t(a) * __uint128_t(b));
+  }
+#endif
+  else {
+    using half_word = half_width_t<word>;
+    const auto shiftl = [](word value) -> word {
+      return value << cpp::numeric_limits<half_word>::digits;
+    };
+    const auto shiftr = [](word value) -> word {
+      return value >> cpp::numeric_limits<half_word>::digits;
+    };
+    // Here we do a one digit multiplication where 'a' and 'b' are of type
+    // word. We split 'a' and 'b' into half words and perform the classic long
+    // multiplication with 'a' and 'b' being two-digit numbers.
+
+    //    a      a_hi a_lo
+    //  x b => x b_hi b_lo
+    // ----    -----------
+    //    c         result
+    // We convert 'lo' and 'hi' from 'half_word' to 'word' so multiplication
+    // doesn't overflow.
+    const word a_lo = lo(a);
+    const word b_lo = lo(b);
+    const word a_hi = hi(a);
+    const word b_hi = hi(b);
+    const word step1 = b_lo * a_lo; // no overflow;
+    const word step2 = b_lo * a_hi; // no overflow;
+    const word step3 = b_hi * a_lo; // no overflow;
+    const word step4 = b_hi * a_hi; // no overflow;
+    word lo_digit = step1;
+    word hi_digit = step4;
+    const word zero_carry = 0;
+    word carry;
+    const auto add_with_carry = LIBC_NAMESPACE::add_with_carry<word>;
+    lo_digit = add_with_carry(lo_digit, shiftl(step2), zero_carry, &carry);
+    hi_digit = add_with_carry(hi_digit, shiftr(step2), carry, nullptr);
+    lo_digit = add_with_carry(lo_digit, shiftl(step3), zero_carry, &carry);
+    hi_digit = add_with_carry(hi_digit, shiftr(step3), carry, nullptr);
+    return Double<word>(lo_digit, hi_digit);
+  }
+}
 
-  auto r2 = add_with_carry(prod.lo, hi_lo.lo << HALF_BIT_WIDTH, T(0));
-  prod.lo = r2.sum;
-  prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum;
+// Inplace binary operation with carry propagation. Returns carry.
+template <typename Function, typename word, size_t N, size_t M>
+LIBC_INLINE constexpr word inplace_binop(Function op_with_carry,
+                                         cpp::array<word, N> &dst,
+                                         const cpp::array<word, M> &rhs) {
+  static_assert(N >= M);
+  word carry_out = 0;
+  for (size_t i = 0; i < N; ++i) {
+    const bool has_rhs_value = i < M;
+    const word rhs_value = has_rhs_value ? rhs[i] : 0;
+    const word carry_in = carry_out;
+    dst[i] = op_with_carry(dst[i], rhs_value, carry_in, &carry_out);
+    // stop early when rhs is over and no carry is to be propagated.
+    if (!has_rhs_value && carry_out == 0)
+      break;
+  }
+  return carry_out;
+}
 
-  return prod;
+// Inplace addition. Returns carry.
+template <typename word, size_t N, size_t M>
+LIBC_INLINE constexpr word add_with_carry(cpp::array<word, N> &dst,
+                                          const cpp::array<word, M> &rhs) {
+  return inplace_binop(LIBC_NAMESPACE::add_with_carry<word>, dst, rhs);
 }
 
-template <>
-LIBC_INLINE constexpr NumberPair<uint32_t> full_mul<uint32_t>(uint32_t a,
-                                                              uint32_t b) {
-  uint64_t prod = uint64_t(a) * uint64_t(b);
-  NumberPair<uint32_t> result;
-  result.lo = uint32_t(prod);
-  result.hi = uint32_t(prod >> 32);
-  return result;
+// Inplace subtraction. Returns borrow.
+template <typename word, size_t N, size_t M>
+LIBC_INLINE constexpr word sub_with_borrow(cpp::array<word, N> &dst,
+                                           const cpp::array<word, M> &rhs) {
+  return inplace_binop(LIBC_NAMESPACE::sub_with_borrow<word>, dst, rhs);
 }
 
-#ifdef LIBC_TYPES_HAS_INT128
-template <>
-LIBC_INLINE constexpr NumberPair<uint64_t> full_mul<uint64_t>(uint64_t a,
-                                                              uint64_t b) {
-  __uint128_t prod = __uint128_t(a) * __uint128_t(b);
-  NumberPair<uint64_t> result;
-  result.lo = uint64_t(prod);
-  result.hi = uint64_t(prod >> 64);
-  return result;
+// Inplace multiply-add. Returns carry.
+// i.e., 'dst += b x c'
+template <typename word, size_t N>
+LIBC_INLINE constexpr word mad_with_carry(cpp::array<word, N> &dst, word b,
+                                          word c) {
+  return add_with_carry(dst, mul2(b, c));
 }
-#endif // LIBC_TYPES_HAS_INT128
 
-} // namespace internal
+// An array of two elements serving as an accumulator during multiword
+// computations.
+template <typename T> struct Accumulator final : cpp::array<T, 2> {
+  using UP = cpp::array<T, 2>;
+  LIBC_INLINE constexpr Accumulator() : UP({0, 0}) {}
+  LIBC_INLINE constexpr T advance(T carry_in) {
+    auto result = UP::front();
+    UP::front() = UP::back();
+    UP::back() = carry_in;
+    return result;
+  }
+  LIBC_INLINE constexpr T sum() const { return UP::front(); }
+  LIBC_INLINE constexpr T carry() const { return UP::back(); }
+};
+
+// Inplace multiplication by a single word. Returns carry.
+template <typename word, size_t N>
+LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array<word, N> &dst,
+                                                      word x) {
+  Accumulator<word> acc;
+  for (auto &val : dst) {
+    const word carry = mad_with_carry(acc, val, x);
+    val = acc.advance(carry);
+  }
+  return acc.carry();
+}
+
+// Multiplication of 'lhs' by 'rhs' into 'dst'. Returns carry.
+// This function is safe to use for signed numbers.
+// https://stackoverflow.com/a/20793834
+// https://pages.cs.wisc.edu/%7Emarkhill/cs354/Fall2008/beyond354/int.mult.html
+template <typename word, size_t O, size_t M, size_t N>
+LIBC_INLINE constexpr word multiply_with_carry(cpp::array<word, O> &dst,
+                                               const cpp::array<word, M> &lhs,
+                                               const cpp::array<word, N> &rhs) {
+  static_assert(O >= M + N);
+  Accumulator<word> acc;
+  for (size_t i = 0; i < O; ++i) {
+    const size_t lower_idx = i < N ? 0 : i - N + 1;
+    const size_t upper_idx = i < M ? i : M - 1;
+    word carry = 0;
+    for (size_t j = lower_idx; j <= upper_idx; ++j)
+      carry += mad_with_carry(acc, lhs[j], rhs[i - j]);
+    dst[i] = acc.advance(carry);
+  }
+  return acc.carry();
+}
+
+template <typename word, size_t N>
+LIBC_INLINE constexpr void quick_mul_hi(cpp::array<word, N> &dst,
+                                        const cpp::array<word, N> &lhs,
+                                        const cpp::array<word, N> &rhs) {
+  Accumulator<word> acc;
+  word carry = 0;
+  // First round of accumulation for those at N - 1 in the full product.
+  for (size_t i = 0; i < N; ++i)
+    carry += mad_with_carry(acc, lhs[i], rhs[N - 1 - i]);
+  for (size_t i = N; i < 2 * N - 1; ++i) {
+    acc.advance(carry);
+    carry = 0;
+    for (size_t j = i - N + 1; j < N; ++j)
+      carry += mad_with_carry(acc, lhs[j], rhs[i - j]);
+    dst[i - N] = acc.sum();
+  }
+  dst.back() = acc.carry();
+}
+
+// An enum for the shift function below.
+enum Direction { LEFT, RIGHT };
+
+// A bitwise shift on an array of elements.
+template <Direction direction, typename word, size_t N>
+LIBC_INLINE constexpr void shift(cpp::array<word, N> &array, size_t offset) {
+  constexpr size_t WORD_BITS = cpp::numeric_limits<word>::digits;
+  constexpr size_t TOTAL_BITS = N * WORD_BITS;
+  if (offset == 0)
+    return;
+  if (offset >= TOTAL_BITS) {
+    array = {};
+    return;
+  }
+  const auto at = [&](size_t index) -> int {
+    // reverse iteration when direction == LEFT.
+    if constexpr (direction == LEFT)
+      return int(N) - int(index) - 1;
+    return int(index);
+  };
+  const auto safe_get_at = [&](size_t index) -> word {
+    // return 0 when accessing out of bound elements.
+    const int i = at(index);
+    return i >= 0 && i < int(N) ? array[i] : 0;
+  };
+  const size_t index_offset = offset / WORD_BITS;
+  const size_t bit_offset = offset % WORD_BITS;
+  for (size_t index = 0; index < N; ++index) {
+    const word part1 = safe_get_at(index + index_offset);
+    const word part2 = safe_get_at(index + index_offset + 1);
+    word &dst = array[at(index)];
+    if (bit_offset == 0)
+      dst = part1; // no crosstalk between parts.
+    else if constexpr (direction == RIGHT)
+      dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset));
+    else if constexpr (direction == LEFT)
+      dst = (part1 << bit_offset) | (part2 >> (WORD_BITS - bit_offset));
+  }
+}
+
+#define DECLARE_COUNTBIT(NAME, INDEX_EXPR)                                     \
+  template <typename word, size_t N>                                           \
+  LIBC_INLINE constexpr int NAME(const cpp::array<word, N> &val) {             \
+    int bit_count = 0;                                                         \
+    for (size_t i = 0; i < N; ++i) {                                           \
+      const int word_count = cpp::NAME<word>(val[INDEX_EXPR]);                 \
+      bit_count += word_count;                                                 \
+      if (word_count != cpp::numeric_limits<word>::digits)                     \
+        break;                                                                 \
+    }                                                                          \
+    return bit_count;                                                          \
+  }
+
+DECLARE_COUNTBIT(countr_zero, i)         // iterating forward
+DECLARE_COUNTBIT(countr_one, i)          // iterating forward
+DECLARE_COUNTBIT(countl_zero, N - i - 1) // iterating backward
+DECLARE_COUNTBIT(countl_one, N - i - 1)  // iterating backward
+
+} // namespace multiword
 
 template <size_t Bits, bool Signed, typename WordType = uint64_t>
 struct BigInt {
@@ -90,6 +301,9 @@ struct BigInt {
                 "WordType must be unsigned integer.");
 
   using word_type = WordType;
+  using unsigned_type = BigInt<Bits, false, word_type>;
+  using signed_type = BigInt<Bits, true, word_type>;
+
   LIBC_INLINE_VAR static constexpr bool SIGNED = Signed;
   LIBC_INLINE_VAR static constexpr size_t BITS = Bits;
   LIBC_INLINE_VAR
@@ -100,10 +314,7 @@ struct BigInt {
 
   LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE;
 
-  using unsigned_type = BigInt<BITS, false, word_type>;
-  using signed_type = BigInt<BITS, true, word_type>;
-
-  cpp::array<WordType, WORD_COUNT> val{};
+  cpp::array<WordType, WORD_COUNT> val{}; // zero initialized.
 
   LIBC_INLINE constexpr BigInt() = default;
 
@@ -112,76 +323,67 @@ struct BigInt {
   template <size_t OtherBits, bool OtherSigned>
   LIBC_INLINE constexpr BigInt(
       const BigInt<OtherBits, OtherSigned, WordType> &other) {
-    if (OtherBits >= Bits) {
+    if (OtherBits >= Bits) { // truncate
       for (size_t i = 0; i < WORD_COUNT; ++i)
         val[i] = other[i];
-    } else {
+    } else { // zero or sign extend
       size_t i = 0;
       for (; i < OtherBits / WORD_SIZE; ++i)
         val[i] = other[i];
-      WordType sign = 0;
-      if constexpr (Signed && OtherSigned) {
-        sign = static_cast<WordType>(
-            -static_cast<cpp::make_signed_t<WordType>>(other.is_neg()));
-      }
-      for (; i < WORD_COUNT; ++i)
-        val[i] = sign;
+      extend(i, Signed && other.is_neg());
     }
   }
 
   // Construct a BigInt from a C array.
-  template <size_t N, cpp::enable_if_t<N <= WORD_COUNT, int> = 0>
-  LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) {
-    size_t min_wordcount = N < WORD_COUNT ? N : WORD_COUNT;
-    size_t i = 0;
-    for (; i < min_wordcount; ++i)
+  template <size_t N> LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) {
+    static_assert(N == WORD_COUNT);
+    for (size_t i = 0; i < WORD_COUNT; ++i)
       val[i] = nums[i];
+  }
 
-    // If nums doesn't completely fill val, then fill the rest with zeroes.
-    for (; i < WORD_COUNT; ++i)
-      val[i] = 0;
+  LIBC_INLINE constexpr explicit BigInt(
+      const cpp::array<WordType, WORD_COUNT> &words) {
+    val = words;
   }
 
   // Initialize the first word to |v| and the rest to 0.
   template <typename T, typename = cpp::enable_if_t<cpp::is_integral_v<T>>>
   LIBC_INLINE constexpr BigInt(T v) {
-    val[0] = static_cast<WordType>(v);
-
-    if constexpr (WORD_COUNT == 1)
-      return;
-
-    if constexpr (Bits < sizeof(T) * CHAR_BIT) {
-      for (int i = 1; i < WORD_COUNT; ++i) {
-        v >>= WORD_SIZE;
-        val[i] = static_cast<WordType>(v);
+    constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT;
+    const bool is_neg = Signed && (v < 0);
+    for (size_t i = 0; i < WORD_COUNT; ++i) {
+      if (v == 0) {
+        extend(i, is_neg);
+        return;
       }
-      return;
-    }
-
-    size_t i = 1;
-
-    if constexpr (WORD_SIZE < sizeof(T) * CHAR_BIT)
-      for (; i < sizeof(T) * CHAR_BIT / WORD_SIZE; ++i) {
+      val[i] = static_cast<WordType>(v);
+      if constexpr (T_SIZE > WORD_SIZE)
         v >>= WORD_SIZE;
-        val[i] = static_cast<WordType>(v);
-      }
-
-    WordType sign = (Signed && (v < 0)) ? ~WordType(0) : WordType(0);
-    for (; i < WORD_COUNT; ++i) {
-      val[i] = sign;
+      else
+        v = 0;
     }
   }
+  LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default;
 
-  LIBC_INLINE constexpr explicit BigInt(
-      const cpp::array<WordType, WORD_COUNT> &words) {
-    for (size_t i = 0; i < WORD_COUNT; ++i)
-      val[i] = words[i];
+  // constants
+  LIBC_INLINE static constexpr BigInt zero() { return BigInt(); }
+  LIBC_INLINE static constexpr BigInt one() { return BigInt(1); }
+  LIBC_INLINE static constexpr BigInt all_ones() { return ~zero(); }
+  LIBC_INLINE static constexpr BigInt min() {
+    BigInt out;
+    if constexpr (SIGNED)
+      out.set_msb();
+    return out;
+  }
+  LIBC_INLINE static constexpr BigInt max() {
+    BigInt out = all_ones();
+    if constexpr (SIGNED)
+      out.clear_msb();
+    return out;
   }
 
   // TODO: Reuse the Sign type.
-  LIBC_INLINE constexpr bool is_neg() const {
-    return val.back() >> (WORD_SIZE - 1);
-  }
+  LIBC_INLINE constexpr bool is_neg() const { return SIGNED && get_msb(); }
 
   template <typename T> LIBC_INLINE constexpr explicit operator T() const {
     return to<T>();
@@ -191,200 +393,100 @@ struct BigInt {
   LIBC_INLINE constexpr cpp::enable_if_t<
       cpp::is_integral_v<T> && !cpp::is_same_v<T, bool>, T>
   to() const {
+    constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT;
     T lo = static_cast<T>(val[0]);
-
-    constexpr size_t T_BITS = sizeof(T) * CHAR_BIT;
-
-    if constexpr (T_BITS <= WORD_SIZE)
+    if constexpr (T_SIZE <= WORD_SIZE)
       return lo;
-
     constexpr size_t MAX_COUNT =
-        T_BITS > Bits ? WORD_COUNT : T_BITS / WORD_SIZE;
+        T_SIZE > Bits ? WORD_COUNT : T_SIZE / WORD_SIZE;
     for (size_t i = 1; i < MAX_COUNT; ++i)
       lo += static_cast<T>(val[i]) << (WORD_SIZE * i);
-
-    if constexpr (Signed && (T_BITS > Bits)) {
+    if constexpr (Signed && (T_SIZE > Bits)) {
       // Extend sign for negative numbers.
       constexpr T MASK = (~T(0) << Bits);
       if (is_neg())
         lo |= MASK;
     }
-
     return lo;
   }
 
   LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); }
 
-  LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default;
-
   LIBC_INLINE constexpr bool is_zero() const {
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      if (val[i] != 0)
+    for (auto part : val)
+      if (part != 0)
         return false;
-    }
     return true;
   }
 
-  // Add x to this number and store the result in this number.
+  // Add 'rhs' to this number and store the result in this number.
   // Returns the carry value produced by the addition operation.
-  LIBC_INLINE constexpr WordType add(const BigInt &x) {
-    SumCarry<WordType> s{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      s = add_with_carry(val[i], x.val[i], s.carry);
-      val[i] = s.sum;
-    }
-    return s.carry;
+  LIBC_INLINE constexpr WordType add_overflow(const BigInt &rhs) {
+    return multiword::add_with_carry(val, rhs.val);
   }
 
   LIBC_INLINE constexpr BigInt operator+(const BigInt &other) const {
-    BigInt result;
-    SumCarry<WordType> s{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      s = add_with_carry(val[i], other.val[i], s.carry);
-      result.val[i] = s.sum;
-    }
+    BigInt result = *this;
+    result.add_overflow(other);
     return result;
   }
 
   // This will only apply when initializing a variable from constant values, so
   // it will always use the constexpr version of add_with_carry.
   LIBC_INLINE constexpr BigInt operator+(BigInt &&other) const {
-    BigInt result;
-    SumCarry<WordType> s{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      s = add_with_carry(val[i], other.val[i], s.carry);
-      result.val[i] = s.sum;
-    }
-    return result;
+    // We use addition commutativity to reuse 'other' and prevent allocation.
+    other.add_overflow(*this); // Returned carry value is ignored.
+    return other;
   }
 
   LIBC_INLINE constexpr BigInt &operator+=(const BigInt &other) {
-    add(other); // Returned carry value is ignored.
+    add_overflow(other); // Returned carry value is ignored.
     return *this;
   }
 
-  // Subtract x to this number and store the result in this number.
+  // Subtract 'rhs' to this number and store the result in this number.
   // Returns the carry value produced by the subtraction operation.
-  LIBC_INLINE constexpr WordType sub(const BigInt &x) {
-    DiffBorrow<WordType> d{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      d = sub_with_borrow(val[i], x.val[i], d.borrow);
-      val[i] = d.diff;
-    }
-    return d.borrow;
+  LIBC_INLINE constexpr WordType sub_overflow(const BigInt &rhs) {
+    return multiword::sub_with_borrow(val, rhs.val);
   }
 
   LIBC_INLINE constexpr BigInt operator-(const BigInt &other) const {
-    BigInt result;
-    DiffBorrow<WordType> d{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      d = sub_with_borrow(val[i], other.val[i], d.borrow);
-      result.val[i] = d.diff;
-    }
+    BigInt result = *this;
+    result.sub_overflow(other); // Returned carry value is ignored.
     return result;
   }
 
   LIBC_INLINE constexpr BigInt operator-(BigInt &&other) const {
-    BigInt result;
-    DiffBorrow<WordType> d{0, 0};
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      d = sub_with_borrow(val[i], other.val[i], d.borrow);
-      result.val[i] = d.diff;
-    }
+    BigInt result = *this;
+    result.sub_overflow(other); // Returned carry value is ignored.
     return result;
   }
 
   LIBC_INLINE constexpr BigInt &operator-=(const BigInt &other) {
     // TODO(lntue): Set overflow flag / errno when carry is true.
-    sub(other);
+    sub_overflow(other); // Returned carry value is ignored.
     return *this;
   }
 
-  // Multiply this number with x and store the result in this number. It is
-  // implemented using the long multiplication algorithm by splitting the
-  // 64-bit words of this number and |x| in to 32-bit halves but peforming
-  // the operations using 64-bit numbers. This ensures that we don't lose the
-  // carry bits.
-  // Returns the carry value produced by the multiplication operation.
+  // Multiply this number with x and store the result in this number.
   LIBC_INLINE constexpr WordType mul(WordType x) {
-    BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0);
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      NumberPair<WordType> prod = internal::full_mul(val[i], x);
-      BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi});
-      const WordType carry = partial_sum.add(tmp);
-      val[i] = partial_sum.val[0];
-      partial_sum.val[0] = partial_sum.val[1];
-      partial_sum.val[1] = carry;
-    }
-    return partial_sum.val[1];
+    return multiword::scalar_multiply_with_carry(val, x);
   }
 
-  LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const {
-    if constexpr (Signed) {
-      BigInt<Bits, false, WordType> a(*this);
-      BigInt<Bits, false, WordType> b(other);
-      const bool a_neg = a.is_neg();
-      const bool b_neg = b.is_neg();
-      if (a_neg)
-        a = -a;
-      if (b_neg)
-        b = -b;
-      BigInt<Bits, false, WordType> prod = a * b;
-      if (a_neg != b_neg)
-        prod = -prod;
-      return static_cast<BigInt<Bits, true, WordType>>(prod);
-    } else {
-      if constexpr (WORD_COUNT == 1) {
-        return {val[0] * other.val[0]};
-      } else {
-        BigInt result(0);
-        BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0);
-        WordType carry = 0;
-        for (size_t i = 0; i < WORD_COUNT; ++i) {
-          for (size_t j = 0; j <= i; j++) {
-            NumberPair<WordType> prod =
-                internal::full_mul(val[j], other.val[i - j]);
-            BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi});
-            carry += partial_sum.add(tmp);
-          }
-          result.val[i] = partial_sum.val[0];
-          partial_sum.val[0] = partial_sum.val[1];
-          partial_sum.val[1] = carry;
-          carry = 0;
-        }
-        return result;
-      }
-    }
-  }
-
-  // Return the full product, only unsigned for now.
+  // Return the full product.
   template <size_t OtherBits>
-  LIBC_INLINE constexpr BigInt<Bits + OtherBits, Signed, WordType>
+  LIBC_INLINE constexpr auto
   ful_mul(const BigInt<OtherBits, Signed, WordType> &other) const {
-    BigInt<Bits + OtherBits, Signed, WordType> result(0);
-    BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0);
-    WordType carry = 0;
-    constexpr size_t OTHER_WORDCOUNT =
-        BigInt<OtherBits, Signed, WordType>::WORD_COUNT;
-    for (size_t i = 0; i <= WORD_COUNT + OTHER_WORDCOUNT - 2; ++i) {
-      const size_t lower_idx =
-          i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1;
-      const size_t upper_idx = i < WORD_COUNT ? i : WORD_COUNT - 1;
-      for (size_t j = lower_idx; j <= upper_idx; ++j) {
-        NumberPair<WordType> prod =
-            internal::full_mul(val[j], other.val[i - j]);
-        BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi});
-        carry += partial_sum.add(tmp);
-      }
-      result.val[i] = partial_sum.val[0];
-      partial_sum.val[0] = partial_sum.val[1];
-      partial_sum.val[1] = carry;
-      carry = 0;
-    }
-    result.val[WORD_COUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0];
+    BigInt<Bits + OtherBits, Signed, WordType> result;
+    multiword::multiply_with_carry(result.val, val, other.val);
     return result;
   }
 
+  LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const {
+    // Perform full mul and truncate.
+    return BigInt(ful_mul(other));
+  }
+
   // Fast hi part of the full product.  The normal product `operator*` returns
   // `Bits` least significant bits of the full product, while this function will
   // approximate `Bits` most significant bits of the full product with errors
@@ -407,39 +509,17 @@ struct BigInt {
   //    256      4        16          10            3
   //    512      8        64          36            7
   LIBC_INLINE constexpr BigInt quick_mul_hi(const BigInt &other) const {
-    BigInt result(0);
-    BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0);
-    WordType carry = 0;
-    // First round of accumulation for those at WORD_COUNT - 1 in the full
-    // product.
-    for (size_t i = 0; i < WORD_COUNT; ++i) {
-      NumberPair<WordType> prod =
-          internal::full_mul(val[i], other.val[WORD_COUNT - 1 - i]);
-      BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi});
-      carry += partial_sum.add(tmp);
-    }
-    for (size_t i = WORD_COUNT; i < 2 * WORD_COUNT - 1; ++i) {
-      partial_sum.val[0] = partial_sum.val[1];
-      partial_sum.val[1] = carry;
-      carry = 0;
-      for (size_t j = i - WORD_COUNT + 1; j < WORD_COUNT; ++j) {
-        NumberPair<WordType> prod =
-            internal::full_mul(val[j], other.val[i - j]);
-        BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi});
-        carry += partial_sum.add(tmp);
-      }
-      result.val[i - WORD_COUNT] = partial_sum.val[0];
-    }
-    result.val[WORD_COUNT - 1] = partial_sum.val[1];
+    BigInt result;
+    multiword::quick_mul_hi(result.val, val, other.val);
     return result;
   }
 
-  // pow takes a power and sets this to its starting value to that power. Zero
-  // to the zeroth power returns 1.
+  // BigInt(x).pow_n(n) computes x ^ n.
+  // Note 0 ^ 0 == 1.
   LIBC_INLINE constexpr void pow_n(uint64_t power) {
-    BigInt result = 1;
+    static_assert(!Signed);
+    BigInt result = one();
     BigInt cur_power = *this;
-
     while (power > 0) {
       if ((power % 2) > 0)
         result *= cur_power;
@@ -449,38 +529,63 @@ struct BigInt {
     *this = result;
   }
 
-  // TODO: Make division work correctly for signed integers.
-
-  // div takes another BigInt of the same size and divides this by it. The value
-  // of this will be set to the quotient, and the return value is the remainder.
-  LIBC_INLINE constexpr cpp::optional<BigInt> div(const BigInt &other) {
-    BigInt remainder(0);
-    if (*this < other) {
-      remainder = *this;
-      *this = BigInt(0);
-      return remainder;
-    }
-    if (other == 1) {
-      return remainder;
-    }
-    if (other == 0) {
-      return cpp::nullopt;
-    }
-
-    BigInt quotient(0);
-    BigInt subtractor = other;
-    int cur_bit = static_cast<int>(subtractor.clz() - this->clz());
-    subtractor.shift_left(cur_bit);
-
-    for (; cur_bit >= 0 && *this > 0; --cur_bit, subtractor.shift_right(1)) {
-      if (*this >= subtractor) {
-        this->sub(subtractor);
-        quotient = quotient | (BigInt(1) << cur_bit);
+  // Performs inplace signed / unsigned division. Returns remainder if not
+  // dividing by zero.
+  // For signed numbers it behaves like C++ signed integer division.
+  // That is by truncating the fractionnal part
+  // https://stackoverflow.com/a/3602857
+  LIBC_INLINE constexpr cpp::optional<BigInt> div(const BigInt &divider) {
+    auto &dividend = *this;
+    if constexpr (SIGNED) {
+      // Signed Multiword Division
+      // 1. Negate the dividend it it is negative, and similarly for the
+      // divisor.
+      // 2. Convert the divident and divisor to unsigned representation.
+      // 3. Use unsigned multiword division algorithm.
+      // 4. Convert the quotient and remainder to signed representation.
+      // 5. Negate the quotient if the dividend and divisor had opposite signs.
+      // 6. Negate the remainder if the dividend was negative.
+      const bool dividend_is_neg = dividend.is_neg();
+      const bool divider_is_neg = divider.is_neg();
+      unsigned_type udividend(dividend);
+      unsigned_type udivider(divider);
+      if (dividend_is_neg)
+        udividend.negate();
+      if (divider_is_neg)
+        udivider.negate();
+      auto maybe_remainder = udividend.div(udivider);
+      if (!maybe_remainder)
+        return cpp::nullopt;
+      unsigned_type remainder = *maybe_remainder;
+      if (dividend_is_neg != divider_is_neg)
+        udividend.negate(); // this is the quotient
+      dividend = signed_type(udividend);
+      if (dividend_is_neg)
+        remainder.negate();
+      return signed_type(remainder);
+    } else {
+      BigInt remainder;
+      if (divider.is_zero())
+        return cpp::nullopt;
+      if (divider == 1)
+        return remainder;
+      BigInt quotient;
+      if (dividend >= divider) {
+        BigInt subtractor = divider;
+        int cur_bit = multiword::countl_zero(subtractor.val) -
+                      multiword::countl_zero(dividend.val);
+        subtractor <<= cur_bit;
+        for (; cur_bit >= 0 && dividend > 0; --cur_bit, subtractor >>= 1) {
+          if (dividend < subtractor)
+            continue;
+          dividend -= subtractor;
+          quotient.set_bit(cur_bit);
+        }
       }
+      remainder = dividend;
+      dividend = quotient;
+      return remainder;
     }
-    remainder = *this;
-    *this = quotient;
-    return remainder;
   }
 
   // Efficiently perform BigInt / (x * 2^e), where x is a half-word-size
@@ -496,19 +601,16 @@ struct BigInt {
   // computation of each step is now properly contained within WordType.
   //   And finally we perform some extra alignment steps for the remaining bits.
   LIBC_INLINE constexpr cpp::optional<BigInt>
-  div_uint_half_times_pow_2(internal::half_width_t<WordType> x, size_t e) {
-    BigInt remainder(0);
-
-    if (x == 0) {
+  div_uint_half_times_pow_2(multiword::half_width_t<WordType> x, size_t e) {
+    BigInt remainder;
+    if (x == 0)
       return cpp::nullopt;
-    }
     if (e >= Bits) {
       remainder = *this;
-      *this = BigInt<Bits, false, WordType>(0);
+      *this = BigInt<Bits, false, WordType>();
       return remainder;
     }
-
-    BigInt quotient(0);
+    BigInt quotient;
     WordType x_word = static_cast<WordType>(x);
     constexpr size_t LOG2_WORD_SIZE = cpp::bit_width(WORD_SIZE) - 1;
     constexpr size_t HALF_WORD_SIZE = WORD_SIZE >> 1;
@@ -633,191 +735,28 @@ struct BigInt {
     return *this;
   }
 
-  // TODO: remove and use cpp::countl_zero below.
-  [[nodiscard]] LIBC_INLINE constexpr int clz() const {
-    constexpr int word_digits = cpp::numeric_limits<word_type>::digits;
-    int leading_zeroes = 0;
-    for (auto i = val.size(); i > 0;) {
-      --i;
-      const int zeroes = cpp::countl_zero(val[i]);
-      leading_zeroes += zeroes;
-      if (zeroes != word_digits)
-        break;
-    }
-    return leading_zeroes;
-  }
-
-  // TODO: remove and use cpp::countr_zero below.
-  [[nodiscard]] LIBC_INLINE constexpr int ctz() const {
-    constexpr int word_digits = cpp::numeric_limits<word_type>::digits;
-    int trailing_zeroes = 0;
-    for (auto word : val) {
-      const int zeroes = cpp::countr_zero(word);
-      trailing_zeroes += zeroes;
-      if (zeroes != word_digits)
-        break;
-    }
-    return trailing_zeroes;
-  }
-
-  LIBC_INLINE constexpr void shift_left(size_t s) {
-    if constexpr (Bits == WORD_SIZE) {
-      // Use native types if possible.
-      if (s >= WORD_SIZE) {
-        val[0] = 0;
-        return;
-      }
-      val[0] <<= s;
-      return;
-    }
-    if constexpr ((Bits == 64) && (WORD_SIZE == 32)) {
-      // Use builtin 64 bits for 32-bit base type if available;
-      if (s >= 64) {
-        val[0] = 0;
-        val[1] = 0;
-        return;
-      }
-      uint64_t tmp = uint64__t(val[0]) + (uint64_t(val[1]) << 62);
-      tmp <<= s;
-      val[0] = uint32_t(tmp);
-      val[1] = uint32_t(tmp >> 32);
-      return;
-    }
-#ifdef LIBC_TYPES_HAS_INT128
-    if constexpr ((Bits == 128) && (WORD_SIZE == 64)) {
-      // Use builtin 128 bits if available;
-      if (s >= 128) {
-        val[0] = 0;
-        val[1] = 0;
-        return;
-      }
-      __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64);
-      tmp <<= s;
-      val[0] = uint64_t(tmp);
-      val[1] = uint64_t(tmp >> 64);
-      return;
-    }
-#endif // LIBC_TYPES_HAS_INT128
-    if (LIBC_UNLIKELY(s == 0))
-      return;
-
-    const size_t drop = s / WORD_SIZE;  // Number of words to drop
-    const size_t shift = s % WORD_SIZE; // Bits to shift in the remaining words.
-    size_t i = WORD_COUNT;
-
-    if (drop < WORD_COUNT) {
-      i = WORD_COUNT - 1;
-      if (shift > 0) {
-        for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) {
-          val[i] = (val[j] << shift) | (val[j - 1] >> (WORD_SIZE - shift));
-        }
-        val[i] = val[0] << shift;
-      } else {
-        for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) {
-          val[i] = val[j];
-        }
-        val[i] = val[0];
-      }
-    }
-
-    for (size_t j = 0; j < i; ++j) {
-      val[j] = 0;
-    }
+  LIBC_INLINE constexpr BigInt &operator<<=(size_t s) {
+    multiword::shift<multiword::LEFT>(val, s);
+    return *this;
   }
 
   LIBC_INLINE constexpr BigInt operator<<(size_t s) const {
     BigInt result(*this);
-    result.shift_left(s);
+    result <<= s;
     return result;
   }
 
-  LIBC_INLINE constexpr BigInt &operator<<=(size_t s) {
-    shift_left(s);
+  LIBC_INLINE constexpr BigInt &operator>>=(size_t s) {
+    multiword::shift<multiword::RIGHT>(val, s);
     return *this;
   }
 
-  LIBC_INLINE constexpr void shift_right(size_t s) {
-    if constexpr ((Bits == 64) && (WORD_SIZE == 32)) {
-      // Use builtin 64 bits if available;
-      if (s >= 64) {
-        val[0] = 0;
-        val[1] = 0;
-        return;
-      }
-      uint64_t tmp = uint64_t(val[0]) + (uint64_t(val[1]) << 32);
-      if constexpr (Signed) {
-        tmp = static_cast<uint64_t>(static_cast<int64_t>(tmp) >> s);
-      } else {
-        tmp >>= s;
-      }
-      val[0] = uint32_t(tmp);
-      val[1] = uint32_t(tmp >> 32);
-      return;
-    }
-#ifdef LIBC_TYPES_HAS_INT128
-    if constexpr ((Bits == 128) && (WORD_SIZE == 64)) {
-      // Use builtin 128 bits if available;
-      if (s >= 128) {
-        val[0] = 0;
-        val[1] = 0;
-        return;
-      }
-      __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64);
-      if constexpr (Signed) {
-        tmp = static_cast<__uint128_t>(static_cast<__int128_t>(tmp) >> s);
-      } else {
-        tmp >>= s;
-      }
-      val[0] = uint64_t(tmp);
-      val[1] = uint64_t(tmp >> 64);
-      return;
-    }
-#endif // LIBC_TYPES_HAS_INT128
-
-    if (LIBC_UNLIKELY(s == 0))
-      return;
-    const size_t drop = s / WORD_SIZE;  // Number of words to drop
-    const size_t shift = s % WORD_SIZE; // Bit shift in the remaining words.
-
-    size_t i = 0;
-    WordType sign = Signed ? is_neg() : 0;
-
-    if (drop < WORD_COUNT) {
-      if (shift > 0) {
-        for (size_t j = drop; j < WORD_COUNT - 1; ++i, ++j) {
-          val[i] = (val[j] >> shift) | (val[j + 1] << (WORD_SIZE - shift));
-        }
-        if constexpr (Signed) {
-          val[i] = static_cast<WordType>(
-              static_cast<cpp::make_signed_t<WordType>>(val[WORD_COUNT - 1]) >>
-              shift);
-        } else {
-          val[i] = val[WORD_COUNT - 1] >> shift;
-        }
-        ++i;
-      } else {
-        for (size_t j = drop; j < WORD_COUNT; ++i, ++j) {
-          val[i] = val[j];
-        }
-      }
-    }
-
-    for (; i < WORD_COUNT; ++i) {
-      val[i] = sign;
-    }
-  }
-
   LIBC_INLINE constexpr BigInt operator>>(size_t s) const {
     BigInt result(*this);
-    result.shift_right(s);
+    result >>= s;
     return result;
   }
 
-  LIBC_INLINE constexpr BigInt &operator>>=(size_t s) {
-    shift_right(s);
-    return *this;
-  }
-
 #define DEFINE_BINOP(OP)                                                       \
   LIBC_INLINE friend constexpr BigInt operator OP(const BigInt &lhs,           \
                                                   const BigInt &rhs) {         \
@@ -833,10 +772,9 @@ struct BigInt {
     return lhs;                                                                \
   }
 
-  DEFINE_BINOP(&)
-  DEFINE_BINOP(|)
-  DEFINE_BINOP(^)
-
+  DEFINE_BINOP(&) // & and &=
+  DEFINE_BINOP(|) // | and |=
+  DEFINE_BINOP(^) // ^ and ^=
 #undef DEFINE_BINOP
 
   LIBC_INLINE constexpr BigInt operator~() const {
@@ -847,8 +785,8 @@ struct BigInt {
   }
 
   LIBC_INLINE constexpr BigInt operator-() const {
-    BigInt result = ~(*this);
-    result.add(BigInt(1));
+    BigInt result(*this);
+    result.negate();
     return result;
   }
 
@@ -865,24 +803,6 @@ struct BigInt {
     return !(lhs == rhs);
   }
 
-private:
-  LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) {
-    const auto compare = [](WordType a, WordType b) {
-      return a == b ? 0 : a > b ? 1 : -1;
-    };
-    if constexpr (Signed) {
-      const bool lhs_is_neg = lhs.is_neg();
-      const bool rhs_is_neg = rhs.is_neg();
-      if (lhs_is_neg != rhs_is_neg)
-        return rhs_is_neg ? 1 : -1;
-    }
-    for (size_t i = WORD_COUNT; i-- > 0;)
-      if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0)
-        return cmp;
-    return 0;
-  }
-
-public:
   LIBC_INLINE friend constexpr bool operator>(const BigInt &lhs,
                                               const BigInt &rhs) {
     return cmp(lhs, rhs) > 0;
@@ -901,24 +821,24 @@ struct BigInt {
   }
 
   LIBC_INLINE constexpr BigInt &operator++() {
-    add(BigInt(1));
+    increment();
     return *this;
   }
 
   LIBC_INLINE constexpr BigInt operator++(int) {
     BigInt oldval(*this);
-    add(BigInt(1));
+    increment();
     return oldval;
   }
 
   LIBC_INLINE constexpr BigInt &operator--() {
-    sub(BigInt(1));
+    decrement();
     return *this;
   }
 
   LIBC_INLINE constexpr BigInt operator--(int) {
     BigInt oldval(*this);
-    sub(BigInt(1));
+    decrement();
     return oldval;
   }
 
@@ -930,9 +850,67 @@ struct BigInt {
   // Return the i-th word of the number.
   LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; }
 
-  LIBC_INLINE WordType *data() { return val; }
+private:
+  LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) {
+    const auto compare = [](WordType a, WordType b) {
+      return a == b ? 0 : a > b ? 1 : -1;
+    };
+    if constexpr (Signed) {
+      const bool lhs_is_neg = lhs.is_neg();
+      const bool rhs_is_neg = rhs.is_neg();
+      if (lhs_is_neg != rhs_is_neg)
+        return rhs_is_neg ? 1 : -1;
+    }
+    for (size_t i = WORD_COUNT; i-- > 0;)
+      if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0)
+        return cmp;
+    return 0;
+  }
+
+  LIBC_INLINE constexpr void bitwise_not() {
+    for (auto &part : val)
+      part = ~part;
+  }
+
+  LIBC_INLINE constexpr void negate() {
+    bitwise_not();
+    increment();
+  }
+
+  LIBC_INLINE constexpr void increment() {
+    multiword::add_with_carry(val, cpp::array<WordType, 1>{1});
+  }
+
+  LIBC_INLINE constexpr void decrement() {
+    multiword::add_with_carry(val, cpp::array<WordType, 1>{1});
+  }
 
-  LIBC_INLINE const WordType *data() const { return val; }
+  LIBC_INLINE constexpr void extend(size_t index, bool is_neg) {
+    const WordType value = is_neg ? cpp::numeric_limits<WordType>::max()
+                                  : cpp::numeric_limits<WordType>::min();
+    for (size_t i = index; i < WORD_COUNT; ++i)
+      val[i] = value;
+  }
+
+  LIBC_INLINE constexpr bool get_msb() const {
+    return val.back() >> (WORD_SIZE - 1);
+  }
+
+  LIBC_INLINE constexpr void set_msb() {
+    val.back() |= mask_leading_ones<WordType, 1>();
+  }
+
+  LIBC_INLINE constexpr void clear_msb() {
+    val.back() &= mask_trailing_ones<WordType, WORD_SIZE - 1>();
+  }
+
+  LIBC_INLINE constexpr void set_bit(size_t i) {
+    const size_t word_index = i / WORD_SIZE;
+    val[word_index] |= WordType(1) << (i % WORD_SIZE);
+  }
+
+  friend signed_type;
+  friend unsigned_type;
 };
 
 namespace internal {
@@ -962,10 +940,8 @@ using Int = BigInt<Bits, true, internal::WordTypeSelectorT<Bits>>;
 // Provides limits of U/Int<128>.
 template <> class cpp::numeric_limits<UInt<128>> {
 public:
-  LIBC_INLINE static constexpr UInt<128> max() {
-    return UInt<128>({0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff});
-  }
-  LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>(0); }
+  LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); }
+  LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); }
   // Meant to match std::numeric_limits interface.
   // NOLINTNEXTLINE(readability-identifier-naming)
   LIBC_INLINE_VAR static constexpr int digits = 128;
@@ -973,12 +949,8 @@ template <> class cpp::numeric_limits<UInt<128>> {
 
 template <> class cpp::numeric_limits<Int<128>> {
 public:
-  LIBC_INLINE static constexpr Int<128> max() {
-    return Int<128>({0xffff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff});
-  }
-  LIBC_INLINE static constexpr Int<128> min() {
-    return Int<128>({0, 0x8000'0000'0000'0000});
-  }
+  LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); }
+  LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); }
   // Meant to match std::numeric_limits interface.
   // NOLINTNEXTLINE(readability-identifier-naming)
   LIBC_INLINE_VAR static constexpr int digits = 128;
@@ -1101,30 +1073,28 @@ has_single_bit(T value) {
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
 countr_zero(const T &value) {
-  return value.ctz();
+  return multiword::countr_zero(value.val);
 }
 
 // Specialization of cpp::countl_zero ('bit.h') for BigInt.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
 countl_zero(const T &value) {
-  return value.clz();
+  return multiword::countl_zero(value.val);
 }
 
 // Specialization of cpp::countl_one ('bit.h') for BigInt.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
 countl_one(T value) {
-  // TODO : Implement a faster version not involving operator~.
-  return cpp::countl_zero<T>(~value);
+  return multiword::countl_one(value.val);
 }
 
 // Specialization of cpp::countr_one ('bit.h') for BigInt.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, int>
 countr_one(T value) {
-  // TODO : Implement a faster version not involving operator~.
-  return cpp::countr_zero<T>(~value);
+  return multiword::countr_one(value.val);
 }
 
 // Specialization of cpp::bit_width ('bit.h') for BigInt.
@@ -1171,53 +1141,49 @@ rotr(T value, int rotate) {
 template <typename T, size_t count>
 LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T>
 mask_trailing_ones() {
-  static_assert(!T::SIGNED);
-  if (count == 0)
-    return T();
-  constexpr unsigned T_BITS = CHAR_BIT * sizeof(T);
-  static_assert(count <= T_BITS && "Invalid bit index");
-  using word_type = typename T::word_type;
-  T out;
-  constexpr int CHUNK_INDEX_CONTAINING_BIT =
-      static_cast<int>(count / T::WORD_SIZE);
-  int index = 0;
-  for (auto &word : out.val) {
-    if (index < CHUNK_INDEX_CONTAINING_BIT)
-      word = -1;
-    else if (index > CHUNK_INDEX_CONTAINING_BIT)
-      word = 0;
-    else
-      word = mask_trailing_ones<word_type, count % T::WORD_SIZE>();
-    ++index;
-  }
+  static_assert(!T::SIGNED && count <= T::BITS);
+  if (count == T::BITS)
+    return T::all_ones();
+  constexpr size_t QUOTIENT = count / T::WORD_SIZE;
+  constexpr size_t REMAINDER = count % T::WORD_SIZE;
+  T out; // zero initialized
+  for (size_t i = 0; i <= QUOTIENT; ++i)
+    out[i] = i < QUOTIENT
+                 ? -1
+                 : mask_trailing_ones<typename T::word_type, REMAINDER>();
   return out;
 }
 
 // Specialization of mask_leading_ones ('math_extras.h') for BigInt.
 template <typename T, size_t count>
 LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T> mask_leading_ones() {
-  static_assert(!T::SIGNED);
-  if (count == 0)
-    return T();
-  constexpr unsigned T_BITS = CHAR_BIT * sizeof(T);
-  static_assert(count <= T_BITS && "Invalid bit index");
-  using word_type = typename T::word_type;
-  T out;
-  constexpr int CHUNK_INDEX_CONTAINING_BIT =
-      static_cast<int>((T::BITS - count - 1ULL) / T::WORD_SIZE);
-  int index = 0;
-  for (auto &word : out.val) {
-    if (index < CHUNK_INDEX_CONTAINING_BIT)
-      word = 0;
-    else if (index > CHUNK_INDEX_CONTAINING_BIT)
-      word = -1;
-    else
-      word = mask_leading_ones<word_type, count % T::WORD_SIZE>();
-    ++index;
-  }
+  static_assert(!T::SIGNED && count <= T::BITS);
+  if (count == T::BITS)
+    return T::all_ones();
+  constexpr size_t QUOTIENT = (T::BITS - count - 1U) / T::WORD_SIZE;
+  constexpr size_t REMAINDER = count % T::WORD_SIZE;
+  T out; // zero initialized
+  for (size_t i = QUOTIENT; i < T::WORD_COUNT; ++i)
+    out[i] = i > QUOTIENT
+                 ? -1
+                 : mask_leading_ones<typename T::word_type, REMAINDER>();
   return out;
 }
 
+// Specialization of mask_trailing_zeros ('math_extras.h') for BigInt.
+template <typename T, size_t count>
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T>
+mask_trailing_zeros() {
+  return mask_leading_ones<T, T::BITS - count>();
+}
+
+// Specialization of mask_leading_zeros ('math_extras.h') for BigInt.
+template <typename T, size_t count>
+LIBC_INLINE constexpr cpp::enable_if_t<is_big_int_v<T>, T>
+mask_leading_zeros() {
+  return mask_trailing_ones<T, T::BITS - count>();
+}
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC___SUPPORT_UINT_H
diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h
index 1287c3e9a84fac..4c59cfd99c2e6c 100644
--- a/libc/src/__support/float_to_string.h
+++ b/libc/src/__support/float_to_string.h
@@ -689,7 +689,7 @@ template <> class FloatToString<long double> {
 
       wide_int float_as_int = mantissa;
 
-      float_as_int.shift_left(exponent);
+      float_as_int <<= exponent;
       int_block_index = 0;
 
       while (float_as_int > 0) {
@@ -708,10 +708,11 @@ template <> class FloatToString<long double> {
 
       const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent;
       static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8);
-      float_as_fixed.shift_left(SHIFT_AMOUNT);
+      float_as_fixed <<= SHIFT_AMOUNT;
 
       // If there are still digits above the decimal point, handle those.
-      if (float_as_fixed.clz() < static_cast<int>(EXTRA_INT_WIDTH)) {
+      if (cpp::countl_zero(float_as_fixed) <
+          static_cast<int>(EXTRA_INT_WIDTH)) {
         UInt<EXTRA_INT_WIDTH> above_decimal_point =
             float_as_fixed >> FLOAT_AS_INT_WIDTH;
 
diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h
index de1f88fbd3f3f5..984826b4dd9b5b 100644
--- a/libc/src/__support/integer_literals.h
+++ b/libc/src/__support/integer_literals.h
@@ -169,6 +169,15 @@ LIBC_INLINE constexpr auto operator""_u256(const char *x) {
   return internal::parse_with_prefix<UInt<256>>(x);
 }
 
+template <typename T> LIBC_INLINE constexpr T parse_bigint(const char *x) {
+  if (x[0] == '-' || x[0] == '+') {
+    auto positive = internal::parse_with_prefix<T>(x + 1);
+    return x[0] == '-' ? -positive : positive;
+  } else {
+    return internal::parse_with_prefix<T>(x);
+  }
+}
+
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H
diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h
index 28ee1be8b99997..dd4ab807dd0367 100644
--- a/libc/src/__support/math_extras.h
+++ b/libc/src/__support/math_extras.h
@@ -12,7 +12,7 @@
 
 #include "src/__support/CPP/bit.h"           // countl_one, countr_zero
 #include "src/__support/CPP/limits.h"        // CHAR_BIT, numeric_limits
-#include "src/__support/CPP/type_traits.h"   // is_unsigned_v
+#include "src/__support/CPP/type_traits.h" // is_unsigned_v, is_constant_evaluated
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_HAS_BUILTIN
 
@@ -33,199 +33,91 @@ mask_trailing_ones() {
 template <typename T, size_t count>
 LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
 mask_leading_ones() {
-  constexpr T MASK(mask_trailing_ones<T, CHAR_BIT * sizeof(T) - count>());
-  return T(~MASK); // bitwise NOT performs integer promotion.
+  return T(~mask_trailing_ones<T, CHAR_BIT * sizeof(T) - count>());
 }
 
-// Add with carry
-template <typename T> struct SumCarry {
-  T sum;
-  T carry;
-};
-
-// This version is always valid for constexpr.
-template <typename T>
-LIBC_INLINE constexpr cpp::enable_if_t<
-    cpp::is_integral_v<T> && cpp::is_unsigned_v<T>, SumCarry<T>>
-add_with_carry_const(T a, T b, T carry_in) {
-  T tmp = a + carry_in;
-  T sum = b + tmp;
-  T carry_out = (sum < b) + (tmp < a);
-  return {sum, carry_out};
-}
-
-template <typename T>
-LIBC_INLINE constexpr cpp::enable_if_t<
-    cpp::is_integral_v<T> && cpp::is_unsigned_v<T>, SumCarry<T>>
-add_with_carry(T a, T b, T carry_in) {
-  return add_with_carry_const<T>(a, b, carry_in);
-}
-
-#if LIBC_HAS_BUILTIN(__builtin_addc)
-// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
-
-template <>
-LIBC_INLINE constexpr SumCarry<unsigned char>
-add_with_carry<unsigned char>(unsigned char a, unsigned char b,
-                              unsigned char carry_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return add_with_carry_const<unsigned char>(a, b, carry_in);
-  } else {
-    SumCarry<unsigned char> result{0, 0};
-    result.sum = __builtin_addcb(a, b, carry_in, &result.carry);
-    return result;
-  }
-}
-
-template <>
-LIBC_INLINE constexpr SumCarry<unsigned short>
-add_with_carry<unsigned short>(unsigned short a, unsigned short b,
-                               unsigned short carry_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return add_with_carry_const<unsigned short>(a, b, carry_in);
-  } else {
-    SumCarry<unsigned short> result{0, 0};
-    result.sum = __builtin_addcs(a, b, carry_in, &result.carry);
-    return result;
-  }
-}
-
-template <>
-LIBC_INLINE constexpr SumCarry<unsigned int>
-add_with_carry<unsigned int>(unsigned int a, unsigned int b,
-                             unsigned int carry_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return add_with_carry_const<unsigned int>(a, b, carry_in);
-  } else {
-    SumCarry<unsigned int> result{0, 0};
-    result.sum = __builtin_addc(a, b, carry_in, &result.carry);
-    return result;
-  }
-}
-
-template <>
-LIBC_INLINE constexpr SumCarry<unsigned long>
-add_with_carry<unsigned long>(unsigned long a, unsigned long b,
-                              unsigned long carry_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return add_with_carry_const<unsigned long>(a, b, carry_in);
-  } else {
-    SumCarry<unsigned long> result{0, 0};
-    result.sum = __builtin_addcl(a, b, carry_in, &result.carry);
-    return result;
-  }
+// Create a bitmask with the count right-most bits set to 0, and all other bits
+// set to 1.  Only unsigned types are allowed.
+template <typename T, size_t count>
+LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
+mask_trailing_zeros() {
+  return mask_leading_ones<T, CHAR_BIT * sizeof(T) - count>();
 }
 
-template <>
-LIBC_INLINE constexpr SumCarry<unsigned long long>
-add_with_carry<unsigned long long>(unsigned long long a, unsigned long long b,
-                                   unsigned long long carry_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return add_with_carry_const<unsigned long long>(a, b, carry_in);
-  } else {
-    SumCarry<unsigned long long> result{0, 0};
-    result.sum = __builtin_addcll(a, b, carry_in, &result.carry);
-    return result;
-  }
+// Create a bitmask with the count left-most bits set to 0, and all other bits
+// set to 1.  Only unsigned types are allowed.
+template <typename T, size_t count>
+LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
+mask_leading_zeros() {
+  return mask_trailing_ones<T, CHAR_BIT * sizeof(T) - count>();
 }
 
-#endif // LIBC_HAS_BUILTIN(__builtin_addc)
+#define RETURN_IF_BUILTIN(TYPE, BUILTIN)                                       \
+  if constexpr (cpp::is_same_v<T, TYPE> && LIBC_HAS_BUILTIN(BUILTIN))          \
+    return BUILTIN(a, b, res);
 
-// Subtract with borrow
-template <typename T> struct DiffBorrow {
-  T diff;
-  T borrow;
-};
-
-// This version is always valid for constexpr.
 template <typename T>
-LIBC_INLINE constexpr cpp::enable_if_t<
-    cpp::is_integral_v<T> && cpp::is_unsigned_v<T>, DiffBorrow<T>>
-sub_with_borrow_const(T a, T b, T borrow_in) {
-  T tmp = a - b;
-  T diff = tmp - borrow_in;
-  T borrow_out = (diff > tmp) + (tmp > a);
-  return {diff, borrow_out};
+[[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T *res) {
+  RETURN_IF_BUILTIN(T, __builtin_add_overflow)
+  if (res)
+    *res = a + b;
+  // https://stackoverflow.com/a/1514309
+  return (b > 0 && a > (cpp::numeric_limits<T>::max() - b)) ||
+         (b < 0 && a < (cpp::numeric_limits<T>::min() - b));
 }
 
-// This version is not always valid for constepxr because it's overriden below
-// if builtins are available.
 template <typename T>
-LIBC_INLINE constexpr cpp::enable_if_t<
-    cpp::is_integral_v<T> && cpp::is_unsigned_v<T>, DiffBorrow<T>>
-sub_with_borrow(T a, T b, T borrow_in) {
-  return sub_with_borrow_const<T>(a, b, borrow_in);
-}
-
-#if LIBC_HAS_BUILTIN(__builtin_subc)
-// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins
-
-template <>
-LIBC_INLINE constexpr DiffBorrow<unsigned char>
-sub_with_borrow<unsigned char>(unsigned char a, unsigned char b,
-                               unsigned char borrow_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return sub_with_borrow_const<unsigned char>(a, b, borrow_in);
-  } else {
-    DiffBorrow<unsigned char> result{0, 0};
-    result.diff = __builtin_subcb(a, b, borrow_in, &result.borrow);
-    return result;
-  }
+[[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T *res) {
+  RETURN_IF_BUILTIN(T, __builtin_sub_overflow)
+  if (res)
+    *res = a - b;
+  // https://stackoverflow.com/a/1514309
+  return (b < 0 && a > (cpp::numeric_limits<T>::max() + b)) ||
+         (b > 0 && a < (cpp::numeric_limits<T>::min() + b));
 }
 
-template <>
-LIBC_INLINE constexpr DiffBorrow<unsigned short>
-sub_with_borrow<unsigned short>(unsigned short a, unsigned short b,
-                                unsigned short borrow_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return sub_with_borrow_const<unsigned short>(a, b, borrow_in);
-  } else {
-    DiffBorrow<unsigned short> result{0, 0};
-    result.diff = __builtin_subcs(a, b, borrow_in, &result.borrow);
-    return result;
-  }
-}
+#undef RETURN_IF_BUILTIN
+#define RETURN_IF_BUILTIN(TYPE, BUILTIN)                                       \
+  if constexpr (cpp::is_same_v<T, TYPE> && LIBC_HAS_BUILTIN(BUILTIN))          \
+    return BUILTIN(a, b, carry_in, carry_out);
 
-template <>
-LIBC_INLINE constexpr DiffBorrow<unsigned int>
-sub_with_borrow<unsigned int>(unsigned int a, unsigned int b,
-                              unsigned int borrow_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return sub_with_borrow_const<unsigned int>(a, b, borrow_in);
-  } else {
-    DiffBorrow<unsigned int> result{0, 0};
-    result.diff = __builtin_subc(a, b, borrow_in, &result.borrow);
-    return result;
-  }
-}
-
-template <>
-LIBC_INLINE constexpr DiffBorrow<unsigned long>
-sub_with_borrow<unsigned long>(unsigned long a, unsigned long b,
-                               unsigned long borrow_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return sub_with_borrow_const<unsigned long>(a, b, borrow_in);
-  } else {
-    DiffBorrow<unsigned long> result{0, 0};
-    result.diff = __builtin_subcl(a, b, borrow_in, &result.borrow);
-    return result;
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
+add_with_carry(T a, T b, T carry_in, T *carry_out = nullptr) {
+  if constexpr (!cpp::is_constant_evaluated()) {
+    RETURN_IF_BUILTIN(unsigned char, __builtin_addcb)
+    RETURN_IF_BUILTIN(unsigned short, __builtin_addcs)
+    RETURN_IF_BUILTIN(unsigned int, __builtin_addc)
+    RETURN_IF_BUILTIN(unsigned long, __builtin_addcl)
+    RETURN_IF_BUILTIN(unsigned long long, __builtin_addcll)
   }
+  T sum;
+  T carry1 = add_overflow(a, b, &sum);
+  T carry2 = add_overflow(sum, carry_in, &sum);
+  if (carry_out)
+    *carry_out = carry1 | carry2;
+  return sum;
 }
 
-template <>
-LIBC_INLINE constexpr DiffBorrow<unsigned long long>
-sub_with_borrow<unsigned long long>(unsigned long long a, unsigned long long b,
-                                    unsigned long long borrow_in) {
-  if (__builtin_is_constant_evaluated()) {
-    return sub_with_borrow_const<unsigned long long>(a, b, borrow_in);
-  } else {
-    DiffBorrow<unsigned long long> result{0, 0};
-    result.diff = __builtin_subcll(a, b, borrow_in, &result.borrow);
-    return result;
+template <typename T>
+[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
+sub_with_borrow(T a, T b, T carry_in, T *carry_out = nullptr) {
+  if constexpr (!cpp::is_constant_evaluated()) {
+    RETURN_IF_BUILTIN(unsigned char, __builtin_subcb)
+    RETURN_IF_BUILTIN(unsigned short, __builtin_subcs)
+    RETURN_IF_BUILTIN(unsigned int, __builtin_subc)
+    RETURN_IF_BUILTIN(unsigned long, __builtin_subcl)
+    RETURN_IF_BUILTIN(unsigned long long, __builtin_subcll)
   }
+  T sub;
+  T carry1 = sub_overflow(a, b, &sub);
+  T carry2 = sub_overflow(sub, carry_in, &sub);
+  if (carry_out)
+    *carry_out = carry1 | carry2;
+  return sub;
 }
 
-#endif // LIBC_HAS_BUILTIN(__builtin_subc)
+#undef RETURN_IF
 
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, int>
diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h
index ee6667b1299fe8..2f713fc03520f9 100644
--- a/libc/src/__support/number_pair.h
+++ b/libc/src/__support/number_pair.h
@@ -20,17 +20,6 @@ template <typename T> struct NumberPair {
   T hi = T(0);
 };
 
-template <typename T>
-cpp::enable_if_t<cpp::is_integral_v<T> && cpp::is_unsigned_v<T>,
-                 NumberPair<T>> constexpr split(T a) {
-  constexpr size_t HALF_BIT_WIDTH = sizeof(T) * 4;
-  constexpr T LOWER_HALF_MASK = (T(1) << HALF_BIT_WIDTH) - T(1);
-  NumberPair<T> result;
-  result.lo = a & LOWER_HALF_MASK;
-  result.hi = a >> HALF_BIT_WIDTH;
-  return result;
-}
-
 } // namespace LIBC_NAMESPACE
 
 #endif // LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H
diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp
index e642248881a41c..8b7c9e61fb72b6 100644
--- a/libc/test/src/__support/math_extras_test.cpp
+++ b/libc/test/src/__support/math_extras_test.cpp
@@ -101,4 +101,61 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) {
     EXPECT_EQ(count_zeros<T>(cpp::numeric_limits<T>::max() >> i), i);
 }
 
+using UnsignedTypes = testing::TypeList<
+#if defined(__SIZEOF_INT128__)
+    __uint128_t,
+#endif
+    unsigned char, unsigned short, unsigned int, unsigned long,
+    unsigned long long>;
+
+TYPED_TEST(LlvmLibcBlockMathExtrasTest, add_overflow, UnsignedTypes) {
+  constexpr T ZERO = cpp::numeric_limits<T>::min();
+  constexpr T ONE(1);
+  constexpr T MAX = cpp::numeric_limits<T>::max();
+  constexpr T BEFORE_MAX = MAX - 1;
+
+  const struct {
+    T lhs;
+    T rhs;
+    T sum;
+    bool carry;
+  } TESTS[] = {
+      {ZERO, ONE, ONE, false},       // 0x00 + 0x01 = 0x01
+      {BEFORE_MAX, ONE, MAX, false}, // 0xFE + 0x01 = 0xFF
+      {MAX, ONE, ZERO, true},        // 0xFF + 0x01 = 0x00 (carry)
+      {MAX, MAX, BEFORE_MAX, true},  // 0xFF + 0xFF = 0xFE (carry)
+  };
+  for (auto tc : TESTS) {
+    T sum;
+    bool carry = add_overflow<T>(tc.lhs, tc.rhs, &sum);
+    EXPECT_EQ(sum, tc.sum);
+    EXPECT_EQ(carry, tc.carry);
+  }
+}
+
+TYPED_TEST(LlvmLibcBlockMathExtrasTest, sub_overflow, UnsignedTypes) {
+  constexpr T ZERO = cpp::numeric_limits<T>::min();
+  constexpr T ONE(1);
+  constexpr T MAX = cpp::numeric_limits<T>::max();
+  constexpr T BEFORE_MAX = MAX - 1;
+
+  const struct {
+    T lhs;
+    T rhs;
+    T sub;
+    bool carry;
+  } TESTS[] = {
+      {ONE, ZERO, ONE, false},      // 0x01 - 0x00 = 0x01
+      {MAX, MAX, ZERO, false},      // 0xFF - 0xFF = 0x00
+      {ZERO, ONE, MAX, true},       // 0x00 - 0x01 = 0xFF (carry)
+      {BEFORE_MAX, MAX, MAX, true}, // 0xFE - 0xFF = 0xFF (carry)
+  };
+  for (auto tc : TESTS) {
+    T sub;
+    bool carry = sub_overflow<T>(tc.lhs, tc.rhs, &sub);
+    EXPECT_EQ(sub, tc.sub);
+    EXPECT_EQ(carry, tc.carry);
+  }
+}
+
 } // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/uint_test.cpp
index 5764324ca28815..5a7d906553a4ab 100644
--- a/libc/test/src/__support/uint_test.cpp
+++ b/libc/test/src/__support/uint_test.cpp
@@ -8,6 +8,7 @@
 
 #include "src/__support/CPP/optional.h"
 #include "src/__support/UInt.h"
+#include "src/__support/integer_literals.h"        // parse_unsigned_bigint
 #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128
 
 #include "include/llvm-libc-macros/math-macros.h" // HUGE_VALF, HUGE_VALF
@@ -15,6 +16,190 @@
 
 namespace LIBC_NAMESPACE {
 
+enum Value { ZERO, ONE, TWO, MIN, MAX };
+
+template <typename T> auto create(Value value) {
+  switch (value) {
+  case ZERO:
+    return T(0);
+  case ONE:
+    return T(1);
+  case TWO:
+    return T(2);
+  case MIN:
+    return T::min();
+  case MAX:
+    return T::max();
+  }
+}
+
+using Types = testing::TypeList< //
+#ifdef LIBC_TYPES_HAS_INT64
+    BigInt<64, false, uint64_t>, // 64-bits unsigned (1 x uint64_t)
+    BigInt<64, true, uint64_t>,  // 64-bits   signed (1 x uint64_t)
+#endif
+#ifdef LIBC_TYPES_HAS_INT128
+    BigInt<128, false, __uint128_t>, // 128-bits unsigned (1 x __uint128_t)
+    BigInt<128, true, __uint128_t>,  // 128-bits   signed (1 x __uint128_t)
+#endif
+    BigInt<16, false, uint16_t>, // 16-bits unsigned (1 x uint16_t)
+    BigInt<16, true, uint16_t>,  // 16-bits   signed (1 x uint16_t)
+    BigInt<64, false, uint16_t>, // 64-bits unsigned (4 x uint16_t)
+    BigInt<64, true, uint16_t>   // 64-bits   signed (4 x uint16_t)
+    >;
+
+#define ASSERT_SAME(A, B) ASSERT_TRUE((A) == (B))
+
+TYPED_TEST(LlvmLibcUIntClassTest, Additions, Types) {
+  ASSERT_SAME(create<T>(ZERO) + create<T>(ZERO), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ONE) + create<T>(ZERO), create<T>(ONE));
+  ASSERT_SAME(create<T>(ZERO) + create<T>(ONE), create<T>(ONE));
+  ASSERT_SAME(create<T>(ONE) + create<T>(ONE), create<T>(TWO));
+  // 2's complement addition works for signed and unsigned types.
+  // - unsigned : 0xff + 0x01 = 0x00 (255 + 1 = 0)
+  // -   signed : 0xef + 0x01 = 0xf0 (127 + 1 = -128)
+  ASSERT_SAME(create<T>(MAX) + create<T>(ONE), create<T>(MIN));
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, Subtraction, Types) {
+  ASSERT_SAME(create<T>(ZERO) - create<T>(ZERO), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ONE) - create<T>(ONE), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ONE) - create<T>(ZERO), create<T>(ONE));
+  // 2's complement subtraction works for signed and unsigned types.
+  // - unsigned : 0x00 - 0x01 = 0xff (   0 - 1 = 255)
+  // -   signed : 0xf0 - 0x01 = 0xef (-128 - 1 = 127)
+  ASSERT_SAME(create<T>(MIN) - create<T>(ONE), create<T>(MAX));
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, Multiplication, Types) {
+  ASSERT_SAME(create<T>(ZERO) * create<T>(ZERO), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ZERO) * create<T>(ONE), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ONE) * create<T>(ZERO), create<T>(ZERO));
+  ASSERT_SAME(create<T>(ONE) * create<T>(ONE), create<T>(ONE));
+  ASSERT_SAME(create<T>(ONE) * create<T>(TWO), create<T>(TWO));
+  ASSERT_SAME(create<T>(TWO) * create<T>(ONE), create<T>(TWO));
+  // - unsigned : 0xff x 0xff = 0x01 (mod 0xff)
+  // -   signed : 0xef x 0xef = 0x01 (mod 0xff)
+  ASSERT_SAME(create<T>(MAX) * create<T>(MAX), create<T>(ONE));
+}
+
+template <typename T> void print(const char *msg, T value) {
+  testing::tlog << msg;
+  IntegerToString<T, radix::Hex> buffer(value);
+  testing::tlog << buffer.view() << "\n";
+}
+
+TEST(LlvmLibcUIntClassTest, SignedAddSub) {
+  // Computations performed by https://www.wolframalpha.com/
+  using T = BigInt<128, true, uint32_t>;
+  const T a = parse_bigint<T>("1927508279017230597");
+  const T b = parse_bigint<T>("278789278723478925");
+  const T s = parse_bigint<T>("2206297557740709522");
+  // Addition
+  ASSERT_SAME(a + b, s);
+  ASSERT_SAME(b + a, s); // commutative
+  // Subtraction
+  ASSERT_SAME(a - s, -b);
+  ASSERT_SAME(s - a, b);
+}
+
+TEST(LlvmLibcUIntClassTest, SignedMulDiv) {
+  // Computations performed by https://www.wolframalpha.com/
+  using T = BigInt<128, true, uint16_t>;
+  struct {
+    const char *a;
+    const char *b;
+    const char *mul;
+  } const test_cases[] = {{"-4", "3", "-12"},
+                          {"-3", "-3", "9"},
+                          {"1927508279017230597", "278789278723478925",
+                           "537368642840747885329125014794668225"}};
+  for (auto tc : test_cases) {
+    const T a = parse_bigint<T>(tc.a);
+    const T b = parse_bigint<T>(tc.b);
+    const T mul = parse_bigint<T>(tc.mul);
+    // Multiplication
+    ASSERT_SAME(a * b, mul);
+    ASSERT_SAME(b * a, mul);   // commutative
+    ASSERT_SAME(a * -b, -mul); // sign
+    ASSERT_SAME(-a * b, -mul); // sign
+    ASSERT_SAME(-a * -b, mul); // sign
+    // Division
+    ASSERT_SAME(mul / a, b);
+    ASSERT_SAME(mul / b, a);
+    ASSERT_SAME(-mul / a, -b); // sign
+    ASSERT_SAME(mul / -a, -b); // sign
+    ASSERT_SAME(-mul / -a, b); // sign
+  }
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, Division, Types) {
+  ASSERT_SAME(create<T>(ZERO) / create<T>(ONE), create<T>(ZERO));
+  ASSERT_SAME(create<T>(MAX) / create<T>(ONE), create<T>(MAX));
+  ASSERT_SAME(create<T>(MAX) / create<T>(MAX), create<T>(ONE));
+  // - unsigned : 0xff / 0x02 = 0x7f
+  // -   signed : 0xef / 0x02 = 0x77
+  ASSERT_SAME(create<T>(MAX) / create<T>(TWO), (create<T>(MAX) >> 1));
+
+  using word_type = typename T::word_type;
+  const T zero_one_repeated = T::all_ones() / T(0xff);
+  const word_type pattern = word_type(~0) / word_type(0xff);
+  for (const word_type part : zero_one_repeated.val) {
+    if constexpr (T::SIGNED == false) {
+      EXPECT_EQ(part, pattern);
+    }
+  }
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, is_neg, Types) {
+  EXPECT_FALSE(create<T>(ZERO).is_neg());
+  EXPECT_FALSE(create<T>(ONE).is_neg());
+  EXPECT_FALSE(create<T>(TWO).is_neg());
+  EXPECT_EQ(create<T>(MIN).is_neg(), T::SIGNED);
+  EXPECT_FALSE(create<T>(MAX).is_neg());
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, Masks, Types) {
+  if constexpr (!T::SIGNED) {
+    constexpr size_t BITS = T::BITS;
+    // mask_trailing_ones
+    ASSERT_SAME((mask_trailing_ones<T, 0>()), T::zero());
+    ASSERT_SAME((mask_trailing_ones<T, 1>()), T::one());
+    ASSERT_SAME((mask_trailing_ones<T, BITS - 1>()), T::all_ones() >> 1);
+    ASSERT_SAME((mask_trailing_ones<T, BITS>()), T::all_ones());
+    // mask_leading_ones
+    ASSERT_SAME((mask_leading_ones<T, 0>()), T::zero());
+    ASSERT_SAME((mask_leading_ones<T, 1>()), T::one() << (BITS - 1));
+    ASSERT_SAME((mask_leading_ones<T, BITS - 1>()), T::all_ones() - T::one());
+    ASSERT_SAME((mask_leading_ones<T, BITS>()), T::all_ones());
+    // mask_trailing_zeros
+    ASSERT_SAME((mask_trailing_zeros<T, 0>()), T::all_ones());
+    ASSERT_SAME((mask_trailing_zeros<T, 1>()), T::all_ones() - T::one());
+    ASSERT_SAME((mask_trailing_zeros<T, BITS - 1>()), T::one() << (BITS - 1));
+    ASSERT_SAME((mask_trailing_zeros<T, BITS>()), T::zero());
+    // mask_trailing_zeros
+    ASSERT_SAME((mask_leading_zeros<T, 0>()), T::all_ones());
+    ASSERT_SAME((mask_leading_zeros<T, 1>()), T::all_ones() >> 1);
+    ASSERT_SAME((mask_leading_zeros<T, BITS - 1>()), T::one());
+    ASSERT_SAME((mask_leading_zeros<T, BITS>()), T::zero());
+  }
+}
+
+TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) {
+  if constexpr (!T::SIGNED) {
+    for (size_t i = 0; i <= T::BITS; ++i) {
+      const auto l_one = T::all_ones() << i; // 0b111...000
+      const auto r_one = T::all_ones() >> i; // 0b000...111
+      const int zeros = i;
+      const int ones = T::BITS - zeros;
+      ASSERT_EQ(cpp::countr_one(r_one), ones);
+      ASSERT_EQ(cpp::countl_one(l_one), ones);
+      ASSERT_EQ(cpp::countr_zero(l_one), zeros);
+      ASSERT_EQ(cpp::countl_zero(r_one), zeros);
+    }
+  }
+}
+
 using LL_UInt64 = UInt<64>;
 // We want to test UInt<128> explicitly. So, for
 // convenience, we use a sugar which does not conflict with the UInt128 type
@@ -561,7 +746,7 @@ TEST(LlvmLibcUIntClassTest, FullMulTests) {
     LL_UInt##Bits a = ~LL_UInt##Bits(0);                                       \
     LL_UInt##Bits hi = a.quick_mul_hi(a);                                      \
     LL_UInt##Bits trunc = static_cast<LL_UInt##Bits>(a.ful_mul(a) >> Bits);    \
-    uint64_t overflow = trunc.sub(hi);                                         \
+    uint64_t overflow = trunc.sub_overflow(hi);                                \
     EXPECT_EQ(overflow, uint64_t(0));                                          \
     EXPECT_LE(uint64_t(trunc), uint64_t(Error));                               \
   } while (0)
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
index 4f976122967c4d..c0d402a89ea3ce 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel
@@ -87,6 +87,7 @@ libc_test(
     srcs = ["uint_test.cpp"],
     deps = [
         "//libc:__support_cpp_optional",
+        "//libc:__support_integer_literals",
         "//libc:__support_macros_properties_types",
         "//libc:__support_uint",
         "//libc:llvm_libc_macros_math_macros",

>From c24e4b12348a5e775e094819f99c6e35a2977076 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Sat, 23 Mar 2024 07:04:00 +0000
Subject: [PATCH 2/6] Add fuzzing and fix signed right shift operator

---
 libc/fuzzing/CMakeLists.txt           |  1 +
 libc/fuzzing/__support/CMakeLists.txt |  7 +++
 libc/fuzzing/__support/uint_fuzz.cpp  | 70 +++++++++++++++++++++++++++
 libc/src/__support/UInt.h             | 21 ++++++--
 4 files changed, 94 insertions(+), 5 deletions(-)
 create mode 100644 libc/fuzzing/__support/CMakeLists.txt
 create mode 100644 libc/fuzzing/__support/uint_fuzz.cpp

diff --git a/libc/fuzzing/CMakeLists.txt b/libc/fuzzing/CMakeLists.txt
index 82487688af1162..816691b4bd4403 100644
--- a/libc/fuzzing/CMakeLists.txt
+++ b/libc/fuzzing/CMakeLists.txt
@@ -1,6 +1,7 @@
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer")
 add_custom_target(libc-fuzzer)
 
+add_subdirectory(__support)
 # TODO(#85680): Re-enable math fuzzing after headers are sorted out
 # add_subdirectory(math)
 add_subdirectory(stdlib)
diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt
new file mode 100644
index 00000000000000..278e914e3fbe95
--- /dev/null
+++ b/libc/fuzzing/__support/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_libc_fuzzer(
+  uint_fuzz
+  SRCS
+    uint_fuzz.cpp
+  DEPENDS
+    libc.src.__support.uint
+)
diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp
new file mode 100644
index 00000000000000..f48f00d3b4ba11
--- /dev/null
+++ b/libc/fuzzing/__support/uint_fuzz.cpp
@@ -0,0 +1,70 @@
+#include "src/__support/CPP/bit.h"
+#include "src/__support/UInt.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+
+using namespace LIBC_NAMESPACE;
+
+// Helper function when using gdb / lldb to set a breakpoint and inspect values.
+template <typename T> void debug_and_trap(const char *msg, T a, T b) {
+  __builtin_trap();
+}
+
+#define DEBUG_AND_TRAP()
+
+#define TEST_BINOP(OP)                                                         \
+  if ((a OP b) != (static_cast<T>(BigInt(a) OP BigInt(b))))                    \
+    debug_and_trap(#OP, a, b);
+
+#define TEST_SHIFTOP(OP)                                                       \
+  if ((a OP b) != (static_cast<T>(BigInt(a) OP b)))                            \
+    debug_and_trap(#OP, a, b);
+
+#define TEST_FUNCTION(FUN)                                                     \
+  if (FUN(a) != FUN(BigInt(a)))                                                \
+    debug_and_trap(#FUN, a, b);
+
+// Test that basic arithmetic operations of BigInt behave like their scalar
+// counterparts.
+template <typename T, typename BigInt> void run_tests(T a, T b) {
+  TEST_BINOP(+)
+  TEST_BINOP(-)
+  TEST_BINOP(*)
+  if (b != 0)
+    TEST_BINOP(/)
+  if (b >= 0 && b < cpp::numeric_limits<T>::digits) {
+    TEST_SHIFTOP(<<)
+    TEST_SHIFTOP(>>)
+  }
+  if constexpr (!BigInt::SIGNED) {
+    TEST_FUNCTION(cpp::has_single_bit)
+    TEST_FUNCTION(cpp::countr_zero)
+    TEST_FUNCTION(cpp::countl_zero)
+    TEST_FUNCTION(cpp::countl_one)
+    TEST_FUNCTION(cpp::countr_one)
+  }
+}
+
+// Reads a T from libfuzzer data.
+template <typename T> T read(const uint8_t *data, size_t &remainder) {
+  T out = 0;
+  constexpr size_t T_SIZE = sizeof(T);
+  const size_t copy_size = remainder < T_SIZE ? remainder : T_SIZE;
+  inline_memcpy(&out, data, copy_size);
+  remainder -= copy_size;
+  return out;
+}
+
+template <typename T, typename BigInt>
+void run_tests(const uint8_t *data, size_t size) {
+  const auto a = read<T>(data, size);
+  const auto b = read<T>(data, size);
+  run_tests<T, BigInt>(a, b);
+}
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+  // unsigned
+  run_tests<uint64_t, BigInt<64, false, uint16_t>>(data, size);
+  // signed
+  run_tests<int64_t, BigInt<64, true, uint16_t>>(data, size);
+  return 0;
+}
diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index 96e346915721e7..427a6fa5d7bfef 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -235,11 +235,17 @@ LIBC_INLINE constexpr void quick_mul_hi(cpp::array<word, N> &dst,
   dst.back() = acc.carry();
 }
 
+template <typename word, size_t N>
+LIBC_INLINE constexpr bool is_negative(cpp::array<word, N> &array) {
+  using signed_word = cpp::make_signed_t<word>;
+  return cpp::bit_cast<signed_word>(array.back()) < 0;
+}
+
 // An enum for the shift function below.
 enum Direction { LEFT, RIGHT };
 
 // A bitwise shift on an array of elements.
-template <Direction direction, typename word, size_t N>
+template <Direction direction, bool is_signed, typename word, size_t N>
 LIBC_INLINE constexpr void shift(cpp::array<word, N> &array, size_t offset) {
   constexpr size_t WORD_BITS = cpp::numeric_limits<word>::digits;
   constexpr size_t TOTAL_BITS = N * WORD_BITS;
@@ -249,6 +255,7 @@ LIBC_INLINE constexpr void shift(cpp::array<word, N> &array, size_t offset) {
     array = {};
     return;
   }
+  const bool is_neg = is_signed && is_negative(array);
   const auto at = [&](size_t index) -> int {
     // reverse iteration when direction == LEFT.
     if constexpr (direction == LEFT)
@@ -256,9 +263,13 @@ LIBC_INLINE constexpr void shift(cpp::array<word, N> &array, size_t offset) {
     return int(index);
   };
   const auto safe_get_at = [&](size_t index) -> word {
-    // return 0 when accessing out of bound elements.
+    // return appropriate value when accessing out of bound elements.
     const int i = at(index);
-    return i >= 0 && i < int(N) ? array[i] : 0;
+    if (i < 0)
+      return 0;
+    if (i >= int(N))
+      return is_neg ? -1 : 0;
+    return array[i];
   };
   const size_t index_offset = offset / WORD_BITS;
   const size_t bit_offset = offset % WORD_BITS;
@@ -736,7 +747,7 @@ struct BigInt {
   }
 
   LIBC_INLINE constexpr BigInt &operator<<=(size_t s) {
-    multiword::shift<multiword::LEFT>(val, s);
+    multiword::shift<multiword::LEFT, SIGNED>(val, s);
     return *this;
   }
 
@@ -747,7 +758,7 @@ struct BigInt {
   }
 
   LIBC_INLINE constexpr BigInt &operator>>=(size_t s) {
-    multiword::shift<multiword::RIGHT>(val, s);
+    multiword::shift<multiword::RIGHT, SIGNED>(val, s);
     return *this;
   }
 

>From 268d4935a29374f206724890cd9e9c34a52b6fd9 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 25 Mar 2024 10:11:45 +0000
Subject: [PATCH 3/6] Add documention, fix imprecise comments, use a single
 macro instead ot two.

---
 libc/src/__support/UInt.h        |  2 +-
 libc/src/__support/math_extras.h | 53 +++++++++++++++++++-------------
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index 427a6fa5d7bfef..f5fe51761f120f 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -70,7 +70,7 @@ template <typename T> LIBC_INLINE constexpr half_width_t<T> hi(T value) {
   return hi(split(value));
 }
 
-// Returns 'a' times 'b' in a Double<word>. Cannot overflow by definition.
+// Returns 'a' times 'b' in a Double<word>. Cannot overflow by construction.
 template <typename word>
 LIBC_INLINE constexpr Double<word> mul2(word a, word b) {
   if constexpr (cpp::is_same_v<word, uint8_t>) {
diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h
index dd4ab807dd0367..bb287a19a794ea 100644
--- a/libc/src/__support/math_extras.h
+++ b/libc/src/__support/math_extras.h
@@ -10,8 +10,8 @@
 #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
 #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H
 
-#include "src/__support/CPP/bit.h"           // countl_one, countr_zero
-#include "src/__support/CPP/limits.h"        // CHAR_BIT, numeric_limits
+#include "src/__support/CPP/bit.h"         // countl_one, countr_zero
+#include "src/__support/CPP/limits.h"      // CHAR_BIT, numeric_limits
 #include "src/__support/CPP/type_traits.h" // is_unsigned_v, is_constant_evaluated
 #include "src/__support/macros/attributes.h" // LIBC_INLINE
 #include "src/__support/macros/config.h"     // LIBC_HAS_BUILTIN
@@ -52,13 +52,16 @@ mask_leading_zeros() {
   return mask_trailing_ones<T, CHAR_BIT * sizeof(T) - count>();
 }
 
-#define RETURN_IF_BUILTIN(TYPE, BUILTIN)                                       \
+#define RETURN_IF_BUILTIN(TYPE, BUILTIN, ...)                                  \
   if constexpr (cpp::is_same_v<T, TYPE> && LIBC_HAS_BUILTIN(BUILTIN))          \
-    return BUILTIN(a, b, res);
+    return BUILTIN(__VA_ARGS__);
 
+// Returns whether 'a + b' overflows.
+// The result is stored in 'res' it not 'nullptr', dropped otherwise.
+// We keep the pass by pointer interface for consistency with the intrinsic.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T *res) {
-  RETURN_IF_BUILTIN(T, __builtin_add_overflow)
+  RETURN_IF_BUILTIN(T, __builtin_add_overflow, a, b, res)
   if (res)
     *res = a + b;
   // https://stackoverflow.com/a/1514309
@@ -66,9 +69,12 @@ template <typename T>
          (b < 0 && a < (cpp::numeric_limits<T>::min() - b));
 }
 
+// Returns whether 'a - b' overflows.
+// The result is stored in 'res' it not 'nullptr', dropped otherwise.
+// We keep the pass by pointer interface for consistency with the intrinsic.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T *res) {
-  RETURN_IF_BUILTIN(T, __builtin_sub_overflow)
+  RETURN_IF_BUILTIN(T, __builtin_sub_overflow, a, b, res)
   if (res)
     *res = a - b;
   // https://stackoverflow.com/a/1514309
@@ -76,20 +82,20 @@ template <typename T>
          (b > 0 && a < (cpp::numeric_limits<T>::min() + b));
 }
 
-#undef RETURN_IF_BUILTIN
-#define RETURN_IF_BUILTIN(TYPE, BUILTIN)                                       \
-  if constexpr (cpp::is_same_v<T, TYPE> && LIBC_HAS_BUILTIN(BUILTIN))          \
-    return BUILTIN(a, b, carry_in, carry_out);
-
+// Returns the result of 'a + b' taking into account 'carry_in'.
+// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise.
+// We keep the pass by pointer interface for consistency with the intrinsic.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
 add_with_carry(T a, T b, T carry_in, T *carry_out = nullptr) {
   if constexpr (!cpp::is_constant_evaluated()) {
-    RETURN_IF_BUILTIN(unsigned char, __builtin_addcb)
-    RETURN_IF_BUILTIN(unsigned short, __builtin_addcs)
-    RETURN_IF_BUILTIN(unsigned int, __builtin_addc)
-    RETURN_IF_BUILTIN(unsigned long, __builtin_addcl)
-    RETURN_IF_BUILTIN(unsigned long long, __builtin_addcll)
+    RETURN_IF_BUILTIN(unsigned char, __builtin_addcb, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned short, __builtin_addcs, a, b, carry_in,
+                      carry_out)
+    RETURN_IF_BUILTIN(unsigned int, __builtin_addc, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned long, __builtin_addcl, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned long long, __builtin_addcll, a, b, carry_in,
+                      carry_out)
   }
   T sum;
   T carry1 = add_overflow(a, b, &sum);
@@ -99,15 +105,20 @@ add_with_carry(T a, T b, T carry_in, T *carry_out = nullptr) {
   return sum;
 }
 
+// Returns the result of 'a - b' taking into account 'carry_in'.
+// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise.
+// We keep the pass by pointer interface for consistency with the intrinsic.
 template <typename T>
 [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t<cpp::is_unsigned_v<T>, T>
 sub_with_borrow(T a, T b, T carry_in, T *carry_out = nullptr) {
   if constexpr (!cpp::is_constant_evaluated()) {
-    RETURN_IF_BUILTIN(unsigned char, __builtin_subcb)
-    RETURN_IF_BUILTIN(unsigned short, __builtin_subcs)
-    RETURN_IF_BUILTIN(unsigned int, __builtin_subc)
-    RETURN_IF_BUILTIN(unsigned long, __builtin_subcl)
-    RETURN_IF_BUILTIN(unsigned long long, __builtin_subcll)
+    RETURN_IF_BUILTIN(unsigned char, __builtin_subcb, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned short, __builtin_subcs, a, b, carry_in,
+                      carry_out)
+    RETURN_IF_BUILTIN(unsigned int, __builtin_subc, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned long, __builtin_subcl, a, b, carry_in, carry_out)
+    RETURN_IF_BUILTIN(unsigned long long, __builtin_subcll, a, b, carry_in,
+                      carry_out)
   }
   T sub;
   T carry1 = sub_overflow(a, b, &sub);

>From 7c03fb7a5f7777c5aa939607f9966472f202e465 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 25 Mar 2024 10:32:34 +0000
Subject: [PATCH 4/6] Use `mul_add` instead of `mad`

---
 libc/src/__support/UInt.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index f5fe51761f120f..e858f0df5b6314 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -163,8 +163,8 @@ LIBC_INLINE constexpr word sub_with_borrow(cpp::array<word, N> &dst,
 // Inplace multiply-add. Returns carry.
 // i.e., 'dst += b x c'
 template <typename word, size_t N>
-LIBC_INLINE constexpr word mad_with_carry(cpp::array<word, N> &dst, word b,
-                                          word c) {
+LIBC_INLINE constexpr word mul_add_with_carry(cpp::array<word, N> &dst, word b,
+                                              word c) {
   return add_with_carry(dst, mul2(b, c));
 }
 
@@ -189,7 +189,7 @@ LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array<word, N> &dst,
                                                       word x) {
   Accumulator<word> acc;
   for (auto &val : dst) {
-    const word carry = mad_with_carry(acc, val, x);
+    const word carry = mul_add_with_carry(acc, val, x);
     val = acc.advance(carry);
   }
   return acc.carry();
@@ -210,7 +210,7 @@ LIBC_INLINE constexpr word multiply_with_carry(cpp::array<word, O> &dst,
     const size_t upper_idx = i < M ? i : M - 1;
     word carry = 0;
     for (size_t j = lower_idx; j <= upper_idx; ++j)
-      carry += mad_with_carry(acc, lhs[j], rhs[i - j]);
+      carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]);
     dst[i] = acc.advance(carry);
   }
   return acc.carry();
@@ -224,12 +224,12 @@ LIBC_INLINE constexpr void quick_mul_hi(cpp::array<word, N> &dst,
   word carry = 0;
   // First round of accumulation for those at N - 1 in the full product.
   for (size_t i = 0; i < N; ++i)
-    carry += mad_with_carry(acc, lhs[i], rhs[N - 1 - i]);
+    carry += mul_add_with_carry(acc, lhs[i], rhs[N - 1 - i]);
   for (size_t i = N; i < 2 * N - 1; ++i) {
     acc.advance(carry);
     carry = 0;
     for (size_t j = i - N + 1; j < N; ++j)
-      carry += mad_with_carry(acc, lhs[j], rhs[i - j]);
+      carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]);
     dst[i - N] = acc.sum();
   }
   dst.back() = acc.carry();

>From f1ab5ec92b655564cea81976372f09871e855d83 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 25 Mar 2024 10:33:56 +0000
Subject: [PATCH 5/6] Fix comments

---
 libc/src/__support/UInt.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index e858f0df5b6314..31cc3299f23721 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -127,7 +127,7 @@ LIBC_INLINE constexpr Double<word> mul2(word a, word b) {
   }
 }
 
-// Inplace binary operation with carry propagation. Returns carry.
+// In-place 'dst op= rhs' with operation with carry propagation. Returns carry.
 template <typename Function, typename word, size_t N, size_t M>
 LIBC_INLINE constexpr word inplace_binop(Function op_with_carry,
                                          cpp::array<word, N> &dst,
@@ -146,22 +146,22 @@ LIBC_INLINE constexpr word inplace_binop(Function op_with_carry,
   return carry_out;
 }
 
-// Inplace addition. Returns carry.
+// In-place addition. Returns carry.
 template <typename word, size_t N, size_t M>
 LIBC_INLINE constexpr word add_with_carry(cpp::array<word, N> &dst,
                                           const cpp::array<word, M> &rhs) {
   return inplace_binop(LIBC_NAMESPACE::add_with_carry<word>, dst, rhs);
 }
 
-// Inplace subtraction. Returns borrow.
+// In-place subtraction. Returns borrow.
 template <typename word, size_t N, size_t M>
 LIBC_INLINE constexpr word sub_with_borrow(cpp::array<word, N> &dst,
                                            const cpp::array<word, M> &rhs) {
   return inplace_binop(LIBC_NAMESPACE::sub_with_borrow<word>, dst, rhs);
 }
 
-// Inplace multiply-add. Returns carry.
-// i.e., 'dst += b x c'
+// In-place multiply-add. Returns carry.
+// i.e., 'dst += b * c'
 template <typename word, size_t N>
 LIBC_INLINE constexpr word mul_add_with_carry(cpp::array<word, N> &dst, word b,
                                               word c) {
@@ -183,7 +183,7 @@ template <typename T> struct Accumulator final : cpp::array<T, 2> {
   LIBC_INLINE constexpr T carry() const { return UP::back(); }
 };
 
-// Inplace multiplication by a single word. Returns carry.
+// In-place multiplication by a single word. Returns carry.
 template <typename word, size_t N>
 LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array<word, N> &dst,
                                                       word x) {

>From 60b37004bfeab35af89075269fad8e9bdeaa483b Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet at google.com>
Date: Mon, 25 Mar 2024 11:00:20 +0000
Subject: [PATCH 6/6] Remove unneeded local function

---
 libc/src/__support/UInt.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h
index 31cc3299f23721..4b8ce85209da16 100644
--- a/libc/src/__support/UInt.h
+++ b/libc/src/__support/UInt.h
@@ -116,13 +116,12 @@ LIBC_INLINE constexpr Double<word> mul2(word a, word b) {
     const word step4 = b_hi * a_hi; // no overflow;
     word lo_digit = step1;
     word hi_digit = step4;
-    const word zero_carry = 0;
+    const word no_carry = 0;
     word carry;
-    const auto add_with_carry = LIBC_NAMESPACE::add_with_carry<word>;
-    lo_digit = add_with_carry(lo_digit, shiftl(step2), zero_carry, &carry);
-    hi_digit = add_with_carry(hi_digit, shiftr(step2), carry, nullptr);
-    lo_digit = add_with_carry(lo_digit, shiftl(step3), zero_carry, &carry);
-    hi_digit = add_with_carry(hi_digit, shiftr(step3), carry, nullptr);
+    lo_digit = add_with_carry<word>(lo_digit, shiftl(step2), no_carry, &carry);
+    hi_digit = add_with_carry<word>(hi_digit, shiftr(step2), carry);
+    lo_digit = add_with_carry<word>(lo_digit, shiftl(step3), no_carry, &carry);
+    hi_digit = add_with_carry<word>(hi_digit, shiftr(step3), carry);
     return Double<word>(lo_digit, hi_digit);
   }
 }