[libc-commits] [libc] [libc][stdfix] Implement fxdivi functions (rdivi) (PR #154914)

Fri Sep 12 08:12:25 PDT 2025

================
@@ -224,6 +224,80 @@ idiv(T x, T y) {
   return static_cast<XType>(result);
 }
 
+LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
+  auto v = x0 * (2.lk - (d * x0));
+  return v;
+}
+
+/* Divide the two integers and return a fixed_point value
+ *
+ * For reference, see:
+ * https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
+ * https://stackoverflow.com/a/9231996
+ */
+template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
+  // If the value of the second operand of the / operator is zero, the
+  // behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
+  LIBC_CRASH_ON_VALUE(d, 0);
+
+  if (LIBC_UNLIKELY(n == 0)) {
+    return FXRep<XType>::ZERO();
+  }
+  bool result_is_negative = (n < 0) ^ (d < 0);
+
+  unsigned int nv = static_cast<unsigned int>(n < 0 ? -n : n);
+  unsigned int dv = static_cast<unsigned int>(d < 0 ? -d : d);
+  unsigned int clz = cpp::countl_zero<unsigned int>(dv) - 1;
+  unsigned long int scaled_val = dv << clz;
+  /* Scale denominator to be in the range of [0.5,1] */
+  FXBits<long accum> d_scaled{scaled_val};
+  unsigned long int scaled_val_n = nv << clz;
+  /* Scale the numerator as much as the denominator to maintain correctness of
+   * the original equation
+   */
+  FXBits<long accum> n_scaled{scaled_val_n};
+  long accum n_scaled_val = n_scaled.get_val();
+  long accum d_scaled_val = d_scaled.get_val();
+  /* x0 = (48/17) - (32/17) * d_n */
+  long accum a = 2.8235lk; /* 48/17 */
+  long accum b = 1.8823lk; /* 32/17 */
+  /* Error of the initial approximation, as derived
+   * from the wikipedia article is
+   *  E0 = 1/17 = 0.059 (5.9%)
+   */
+  long accum initial_approx = a - (b * d_scaled_val);
+  /* Each newton-raphson iteration will square the error, due
+   * to quadratic convergence. So,
+   *  E1 = (0.059)^2 = 0.0034
+   */
+  long accum val = nrstep(d_scaled_val, initial_approx);
+  /* E2 = 0.0000121 */
+  val = nrstep(d_scaled_val, val);
+  /* E3 = 1.468e−10 */
----------------
lntue wrote:

`E3 < 2^-32` already.  Any extra Newton-Raphson step won't make it more accurate.

https://github.com/llvm/llvm-project/pull/154914