[libc-commits] [libc] [libc] Alternative algorithm for decimal FP printf (PR #123643)
via libc-commits
libc-commits at lists.llvm.org
Wed Jan 29 06:53:12 PST 2025
================
@@ -464,6 +567,96 @@ LIBC_INLINE constexpr DyadicFloat<Bits> quick_mul(const DyadicFloat<Bits> &a,
return result;
}
+// Correctly rounded multiplication of 2 dyadic floats, assuming the
+// exponent remains within range.
+template <size_t Bits>
+LIBC_INLINE constexpr DyadicFloat<Bits>
+rounded_mul(const DyadicFloat<Bits> &a, const DyadicFloat<Bits> &b) {
+ using DblMant = LIBC_NAMESPACE::UInt<(2 * Bits)>;
+ Sign result_sign = (a.sign != b.sign) ? Sign::NEG : Sign::POS;
+ int result_exponent = a.exponent + b.exponent + static_cast<int>(Bits);
+ auto product = DblMant(a.mantissa) * DblMant(b.mantissa);
+ // As in quick_mul(), renormalize by 1 bit manually rather than countl_zero
+ if (product.get_bit(2 * Bits - 1) == 0) {
+ product <<= 1;
+ result_exponent -= 1;
+ }
+
+ return DyadicFloat<Bits>::round(result_sign, result_exponent, product, Bits);
+}
+
+// Approximate reciprocal - given a nonzero a, make a good approximation to 1/a.
+// The method is Newton-Raphson iteration, based on quick_mul.
+template <size_t Bits, typename = cpp::enable_if_t<(Bits >= 32)>>
+LIBC_INLINE constexpr DyadicFloat<Bits>
+approx_reciprocal(const DyadicFloat<Bits> &a) {
+ // Given an approximation x to 1/a, a better one is x' = x(2-ax).
+ //
+ // You can derive this by using the Newton-Raphson formula with the function
+ // f(x) = 1/x - a. But another way to see that it works is to say: suppose
+ // that ax = 1-e for some small error e. Then ax' = ax(2-ax) = (1-e)(1+e) =
+ // 1-e^2. So the error in x' is the square of the error in x, i.e. the number
+ // of correct bits in x' is double the number in x.
+
+ // An initial approximation to the reciprocal
+ DyadicFloat<Bits> x(Sign::POS, -32 - a.exponent - Bits,
+ uint64_t(0xFFFFFFFFFFFFFFFF) /
+ static_cast<uint64_t>(a.mantissa >> (Bits - 32)));
+
+ // The constant 2, which we'll need in every iteration
+ DyadicFloat<Bits> two(Sign::POS, 1, 1);
+
+ // We expect at least 31 correct bits from our 32-bit starting approximation
+ size_t ok_bits = 31;
+
+ // The number of good bits doubles in each iteration, except that rounding
+ // errors introduce a little extra each time. Subtract a bit from our
+ // accuracy assessment to account for that.
+ while (ok_bits < Bits) {
+ x = quick_mul(x, quick_sub(two, quick_mul(a, x)));
+ ok_bits = 2 * ok_bits - 1;
+ }
+
+ return x;
+}
+
+// Correctly rounded division of 2 dyadic floats, assuming the
+// exponent remains within range.
+template <size_t Bits>
+LIBC_INLINE constexpr DyadicFloat<Bits>
+rounded_div(const DyadicFloat<Bits> &af, const DyadicFloat<Bits> &bf) {
+ using DblMant = LIBC_NAMESPACE::UInt<(Bits * 2 + 64)>;
+
+ // Make an approximation to the quotient as a * (1/b). Both the
+ // multiplication and the reciprocal are a bit sloppy, which doesn't
+ // matter, because we're going to correct for that below.
+ auto qf = fputil::quick_mul(af, fputil::approx_reciprocal(bf));
+
+ // Switch to BigInt and stop using quick_add and quick_mul: now
+ // we're working in exact integers so as to get the true remainder.
+ DblMant a = af.mantissa, b = bf.mantissa, q = qf.mantissa;
+ q <<= 2; // leave room for a round bit, even if exponent decreases
+ a <<= af.exponent - bf.exponent - qf.exponent + 2;
+ DblMant qb = q * b;
+ if (qb < a) {
+ DblMant too_small = a - b;
+ while (qb <= too_small) {
+ qb += b;
+ ++q;
+ }
+ } else {
+ while (qb > a) {
+ qb -= b;
+ --q;
+ }
+ }
+
+ DyadicFloat<(Bits * 2)> qbig(qf.sign, qf.exponent - 2, q);
+ auto qfinal = DyadicFloat<Bits>::round(qbig.sign, qbig.exponent + Bits,
----------------
lntue wrote:
nit: just `return DyadicFloat<Bits>::round...`
https://github.com/llvm/llvm-project/pull/123643
More information about the libc-commits
mailing list