[libc-commits] [libc] [libc] Alternative algorithm for decimal FP printf (PR #123643)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Mon Jan 27 14:08:21 PST 2025
================
@@ -0,0 +1,669 @@
+//===-- Decimal Float Converter for printf (320-bit float) ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an alternative to the Ryū printf algorithm in
+// float_dec_converter.h. Instead of generating output digits 9 at a time on
+// demand, in this implementation, a float is converted to decimal by computing
+// just one power of 10 and multiplying/dividing the entire input by it,
+// generating the whole string of decimal output digits in one go.
+//
+// This avoids the large constant lookup table of Ryū, making it more suitable
+// for low-memory embedded contexts; but it's also faster than the fallback
+// version of Ryū which computes table entries on demand using DyadicFloat,
+// because those must calculate a potentially large power of 10 per 9-digit
+// output block, whereas this computes just one, which does the whole job.
+//
+// The calculation is done in 320-bit DyadicFloat, which provides enough
+// precision to generate 39 correct digits of output from any floating-point
+// size up to and including 128-bit long double, because the rounding errors in
+// computing the largest necessary power of 10 are still smaller than the
+// distance (in the 320-bit float format) between adjacent 39-decimal-digit
+// outputs.
+//
+// No further digits beyond the 39th are generated: if the printf format string
+// asks for more precision than that, the answer is padded with 0s. This is a
+// permitted option in IEEE 754-2019 (section 5.12.2): you're allowed to define
+// a limit H on the number of decimal digits you can generate, and pad with 0s
+// if asked for more than that, subject to the constraint that H must be
+// consistent across all float formats you support (you can't use a smaller H
+// for single precision than double or long double), and must be large enough
+// that even in the largest supported precision the only numbers misrounded are
+// ones extremely close to a rounding boundary. 39 digits is the smallest
+// permitted value for an implementation supporting binary128.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_DEC_CONVERTER_LIMITED_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FLOAT_DEC_CONVERTER_LIMITED_H
+
+#include "src/__support/CPP/algorithm.h"
+#include "src/__support/CPP/string.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/dyadic_float.h"
+#include "src/__support/FPUtil/rounding_mode.h"
+#include "src/__support/integer_to_string.h"
+#include "src/__support/macros/config.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/float_inf_nan_converter.h"
+#include "src/stdio/printf_core/writer.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace printf_core {
+
+enum class ConversionType { E, F, G };
+using StorageType = fputil::FPBits<long double>::StorageType;
+
+constexpr unsigned MAX_DIGITS = 39;
+constexpr size_t DF_BITS = 320;
+constexpr char DECIMAL_POINT = '.';
+
+struct DigitsInput {
+ // Input mantissa, stored with the explicit leading 1 bit (if any) at the
+ // top. So either it has a value in the range [2^127,2^128) representing a
+ // real number in [1,2), or it has the value 0, representing 0.
+ UInt128 mantissa;
+
+ // Input exponent, as a power of 2 to multiply into mantissa.
+ int exponent;
+
+ // Input sign.
+ Sign sign;
+
+ // Constructor which accepts a mantissa direct from a floating-point format,
+ // and shifts it up to the top of the UInt128 so that a function consuming
+ // this struct afterwards doesn't have to remember which format it came from.
+ DigitsInput(int32_t fraction_len, StorageType mantissa_, int exponent_,
+ Sign sign)
+ : mantissa(UInt128(mantissa_) << (127 - fraction_len)),
+ exponent(exponent_), sign(sign) {
+ if (!(mantissa & (UInt128(1) << 127)) && mantissa != 0) {
+ // Normalize a denormalized input.
+ int shift = cpp::countl_zero(mantissa);
+ mantissa <<= shift;
+ exponent -= shift;
+ }
+ }
+};
+
+struct DigitsOutput {
+ // Output from decimal_digits().
+ //
+ // `digits` is a buffer containing nothing but ASCII digits. Even if the
+ // decimal point needs to appear somewhere in the final output string, it
+ // isn't represented in _this_ string; the client of this object will insert
+ // it in an appropriate place. `ndigits` gives the buffer size.
+ //
+ // `exponent` represents the exponent you would display if the decimal point
+ // comes after the first digit of decimal_digits, e.g. if digits == "1234"
+ // and exponent = 3 then this represents 1.234e3, or just the integer 1234.
+ size_t ndigits;
+ int exponent;
+ char digits[MAX_DIGITS + 1];
+};
+
+// Calculate the actual digits of a decimal representation of an FP number.
+//
+// If `e_mode` is true, then `precision` indicates the desired number of output
+// decimal digits. On return, `decimal_digits` will be a string of length
+// exactly `precision` starting with a nonzero digit; `decimal_exponent` will
+// be filled in to indicate the exponent as shown above.
+//
+// If `e_mode` is false, then `precision` indicates the desired number of
+// digits after the decimal point. On return, the last digit in the string
+// `decimal_digits` has a place value of _at least_ 10^-precision. But also, at
+// most `MAX_DIGITS` digits are returned, so the caller may need to pad it at
+// the end with the appropriate number of extra 0s.
+LIBC_INLINE
+DigitsOutput decimal_digits(DigitsInput input, int precision, bool e_mode) {
+ if (input.mantissa == 0) {
+ // Special-case zero, by manually generating the right number of zero
+ // digits and setting an appropriate exponent.
+ DigitsOutput output;
+ if (!e_mode) {
+ // In F mode, it's enough to return an empty string of digits. That's the
+ // same thing we do when given a nonzero number that rounds down to 0.
+ output.ndigits = 0;
+ output.exponent = -precision - 1;
+ } else {
+ // In E mode, generate a string containing the expected number of 0s.
+ __builtin_memset(output.digits, '0', precision);
+ output.ndigits = precision;
+ output.exponent = 0;
+ }
+ return output;
+ }
+
+ // Estimate log10 of the input value, by multiplying its binary exponent by
+ // 1292913986/2^32. That is a rounded-down approximation to log10(2),
+ // accurate enough that for any binary exponent in the range of float128 it
+ // will give the correct value of floor(log10(2^n)).
+ //
+ // This estimate is correct for deciding how many decimal digits we end up
+ // producing, unless a power of 10 falls in the interval corresponding to
+ // this binary exponent, in which case there might be one more decimal digit
+ // for larger mantissas. To detect this, we do the same computation for the
+ // next exponent up.
+ int log10_input_min = ((input.exponent - 1) * 1292913986LL) >> 32;
+ int log10_input_max = (input.exponent * 1292913986LL) >> 32;
----------------
michaelrj-google wrote:
nit: move the log10 approximation into a helper function
https://github.com/llvm/llvm-project/pull/123643
More information about the libc-commits
mailing list