[libc-commits] [libc] [libc][stdfix] Implement fxdivi functions (rdivi) (PR #154914)
Shreeyash Pandey via libc-commits
libc-commits at lists.llvm.org
Sat Sep 27 06:06:41 PDT 2025
================
@@ -224,6 +225,87 @@ idiv(T x, T y) {
return static_cast<XType>(result);
}
+LIBC_INLINE long accum nrstep(long accum d, long accum x0) {
+ auto v = x0 * (2.lk - (d * x0));
+ return v;
+}
+
+// Divide the two integers and return a fixed_point value
+//
+// For reference, see:
+// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
+// https://stackoverflow.com/a/9231996
+
+template <typename XType> LIBC_INLINE constexpr XType divi(int n, int d) {
+ // If the value of the second operand of the / operator is zero, the
+ // behavior is undefined. Ref: ISO/IEC TR 18037:2008(E) p.g. 16
+ LIBC_CRASH_ON_VALUE(d, 0);
+
+ if (LIBC_UNLIKELY(n == 0)) {
+ return FXRep<XType>::ZERO();
+ }
+ bool result_is_negative = ((n < 0) != (d < 0));
+
+ int64_t n64 = static_cast<int64_t>(n);
+ int64_t d64 = static_cast<int64_t>(d);
+
+ uint64_t nv = static_cast<uint64_t>(n64 < 0 ? -n64 : n64);
+ uint64_t dv = static_cast<uint64_t>(d64 < 0 ? -d64 : d64);
+
+ if (d == INT_MIN) {
+ nv <<= 1;
+ dv >>= 1;
+ }
+
+ uint32_t clz = cpp::countl_zero<uint32_t>(static_cast<uint32_t>(dv)) - 1;
+ uint64_t scaled_val = dv << clz;
+ // Scale denominator to be in the range of [0.5,1]
+ FXBits<long accum> d_scaled{scaled_val};
+ uint64_t scaled_val_n = nv << clz;
+ // Scale the numerator as much as the denominator to maintain correctness of
+ // the original equation
+ FXBits<long accum> n_scaled{scaled_val_n};
+ long accum n_scaled_val = n_scaled.get_val();
+ long accum d_scaled_val = d_scaled.get_val();
+ // x0 = (48/17) - (32/17) * d_n
+ long accum a = 0x2.d89d89d8p0lk; // 48/17 = 2.8235294...
+ long accum b = 0x1.e1e1e1e1p0lk; // 32/17 = 1.8823529...
+ // Error of the initial approximation, as derived
+ // from the wikipedia article is
+ // E0 = 1/17 = 0.059 (5.9%)
+ long accum initial_approx = a - (b * d_scaled_val);
+ // Since, 0.5 <= d_scaled_val <= 1.0, 0.9412 <= initial_approx <= 1.88235
+ LIBC_ASSERT((initial_approx >= 0x0.78793dd9p0lk) &&
+ (initial_approx <= 0x1.f0f0d845p0lk));
+ // Each newton-raphson iteration will square the error, due
+ // to quadratic convergence. So,
+ // E1 = (0.059)^2 = 0.0034
+ long accum val = nrstep(d_scaled_val, initial_approx);
+ // E2 = 0.0000121
+ val = nrstep(d_scaled_val, val);
+ // E3 = 1.468e−10
+ val = nrstep(d_scaled_val, val);
+
----------------
bojle wrote:
Doing this, I end up losing precision for finite result tests. For example, `128/256` results in 0.499969.
https://github.com/llvm/llvm-project/pull/154914
More information about the libc-commits
mailing list