[libc-commits] [libc] [libc][stdfix] Implement fxdivi functions (rdivi) (PR #154914)

Sat Sep 27 06:06:41 PDT 2025

================
@@ -224,6 +225,87 @@ idiv(T x, T y) {
   return static_cast<XType>(result);
 }
 
+LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
+  auto v = x0 * (2.lk - (d * x0));
+  return v;
+}
+
+// Divide the two integers and return a fixed_point value
+//
+// For reference, see:
+// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
+// https://stackoverflow.com/a/9231996
+
+template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
+  // If the value of the second operand of the / operator is zero, the
+  // behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
+  LIBC_CRASH_ON_VALUE(d, 0);
+
+  if (LIBC_UNLIKELY(n == 0)) {
+    return FXRep<XType>::ZERO();
+  }
+  bool result_is_negative = ((n < 0) != (d < 0));
+
+  int64_t n64 = static_cast<int64_t>(n);
+  int64_t d64 = static_cast<int64_t>(d);
+
+  uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64);
+  uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64);
+
+  if (d == INT_MIN) {
+    nv <<= 1;
+    dv >>= 1;
+  }
+
+  uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1;
+  uint64_t scaled_val = dv << clz;
+  // Scale denominator to be in the range of [0.5,1]
+  FXBits<long accum> d_scaled{scaled_val};
+  uint64_t scaled_val_n = nv << clz;
+  // Scale the numerator as much as the denominator to maintain correctness of
+  // the original equation
+  FXBits<long accum> n_scaled{scaled_val_n};
+  long accum n_scaled_val = n_scaled.get_val();
+  long accum d_scaled_val = d_scaled.get_val();
+  // x0 = (48/17) - (32/17) * d_n
+  long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294...
+  long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529...
+  // Error of the initial approximation, as derived
+  // from the wikipedia article is
+  //  E0 = 1/17 = 0.059 (5.9%)
+  long accum initial_approx = a - (b * d_scaled_val);
+  // Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235
+  LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) &&
+              (initial_approx <= 0x1.f0f0d845p0lk));
+  // Each newton-raphson iteration will square the error, due
+  // to quadratic convergence. So,
+  // E1 = (0.059)^2 = 0.0034
+  long accum val = nrstep(d_scaled_val, initial_approx);
+  // E2 = 0.0000121
+  val = nrstep(d_scaled_val, val);
+  // E3 = 1.468e−10
+  val = nrstep(d_scaled_val, val);
+
----------------
bojle wrote:

Doing this, I end up losing precision for finite result tests. For example, `128/256` results in 0.499969. 

https://github.com/llvm/llvm-project/pull/154914