[libc-commits] [libc] [llvm] [libc] Add fixed point support to printf (PR #82707)

Thu Feb 22 15:44:50 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: Michael Jones (michaelrj-google)

<details>
<summary>Changes</summary>

This patch adds the r, R, k, and K conversion specifiers to printf, with
accompanying tests. They are guarded behind the
LIBC_COPT_PRINTF_DISABLE_FIXED_POINT flag as well as automatic fixed
point support detection.



---

Patch is 34.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82707.diff


12 Files Affected:

- (modified) libc/docs/dev/printf_behavior.rst (+7) 
- (modified) libc/src/stdio/printf_core/CMakeLists.txt (+1) 
- (modified) libc/src/stdio/printf_core/converter.cpp (+8) 
- (modified) libc/src/stdio/printf_core/converter_atlas.h (+5) 
- (modified) libc/src/stdio/printf_core/converter_utils.h (+3) 
- (modified) libc/src/stdio/printf_core/core_structs.h (+19-4) 
- (added) libc/src/stdio/printf_core/fixed_converter.h (+386) 
- (modified) libc/src/stdio/printf_core/float_dec_converter.h (-3) 
- (modified) libc/src/stdio/printf_core/parser.h (+65) 
- (modified) libc/src/stdio/printf_core/printf_config.h (+7) 
- (modified) libc/test/src/stdio/sprintf_test.cpp (+160) 
- (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+9-7) 


``````````diff

diff --git a/libc/docs/dev/printf_behavior.rst b/libc/docs/dev/printf_behavior.rst
index bc60aa43ee2b6b..7786b27a9fdcd5 100644
--- a/libc/docs/dev/printf_behavior.rst
+++ b/libc/docs/dev/printf_behavior.rst
@@ -62,6 +62,13 @@ When set, this flag disables support for floating point numbers and all their
 conversions (%a, %f, %e, %g); any floating point number conversion will be
 treated as invalid. This reduces code size.
 
+LIBC_COPT_PRINTF_DISABLE_FIXED_POINT
+------------------------------------
+When set, this flag disables support for fixed point numbers and all their
+conversions (%r, %k); any fixed point number conversion will be treated as
+invalid. This reduces code size. This has no effect if the current compiler does
+not support fixed point numbers.
+
 LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS
 ----------------------------------
 When set, this flag disables the nullptr checks in %n and %s.
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 8da274395526df..cd75060db37579 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -76,6 +76,7 @@ add_object_library(
     float_inf_nan_converter.h
     float_hex_converter.h
     float_dec_converter.h
+    fixed_converter.h #TODO: Check if this should be disabled when fixed unavail
   DEPENDS
     .writer
     .core_structs
diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
index 52412aef3c5c15..613d693c3cfcb3 100644
--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -9,6 +9,7 @@
 #include "src/stdio/printf_core/converter.h"
 
 #include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/printf_config.h"
 #include "src/stdio/printf_core/writer.h"
 
 // This option allows for replacing all of the conversion functions with custom
@@ -75,6 +76,13 @@ int convert(Writer *writer, const FormatSection &to_conv) {
   case 'G':
     return convert_float_dec_auto(writer, to_conv);
 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
+#ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+  case 'r':
+  case 'R':
+  case 'k':
+  case 'K':
+    return convert_fixed(writer, to_conv);
+#endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
   case 'n':
     return convert_write_int(writer, to_conv);
diff --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
index 6471f3f2955b75..2189ed11a551ea 100644
--- a/libc/src/stdio/printf_core/converter_atlas.h
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@@ -31,6 +31,11 @@
 #include "src/stdio/printf_core/float_hex_converter.h"
 #endif // LIBC_COPT_PRINTF_DISABLE_FLOAT
 
+#ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+// defines convert_fixed
+#include "src/stdio/printf_core/fixed_converter.h"
+#endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+
 #ifndef LIBC_COPT_PRINTF_DISABLE_WRITE_INT
 #include "src/stdio/printf_core/write_int_converter.h"
 #endif // LIBC_COPT_PRINTF_DISABLE_WRITE_INT
diff --git a/libc/src/stdio/printf_core/converter_utils.h b/libc/src/stdio/printf_core/converter_utils.h
index 54f0a870d0ac4a..948fe816e9b76d 100644
--- a/libc/src/stdio/printf_core/converter_utils.h
+++ b/libc/src/stdio/printf_core/converter_utils.h
@@ -51,6 +51,9 @@ LIBC_INLINE uintmax_t apply_length_modifier(uintmax_t num, LengthModifier lm) {
       return result;                                                           \
   }
 
+// This is used to represent which direction the number should be rounded.
+enum class RoundDirection { Up, Down, Even };
+
 } // namespace printf_core
 } // namespace LIBC_NAMESPACE
 
diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index 7634d45568ab84..681f85dd5a285d 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -10,7 +10,9 @@
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
 
 #include "src/__support/CPP/string_view.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/stdio/printf_core/printf_config.h"
 
 #include <inttypes.h>
 #include <stddef.h>
@@ -77,7 +79,13 @@ struct FormatSection {
   }
 };
 
-enum PrimaryType : uint8_t { Unknown = 0, Float = 1, Pointer = 2, Integer = 3 };
+enum PrimaryType : uint8_t {
+  Unknown = 0,
+  Float = 1,
+  Pointer = 2,
+  Integer = 3,
+  FixedPoint = 4,
+};
 
 // TypeDesc stores the information about a type that is relevant to printf in
 // a relatively compact manner.
@@ -95,9 +103,16 @@ template <typename T> LIBC_INLINE constexpr TypeDesc type_desc_from_type() {
   } else {
     constexpr bool IS_POINTER = cpp::is_pointer_v<T>;
     constexpr bool IS_FLOAT = cpp::is_floating_point_v<T>;
-    return TypeDesc{sizeof(T), IS_POINTER ? PrimaryType::Pointer
-                               : IS_FLOAT ? PrimaryType::Float
-                                          : PrimaryType::Integer};
+#ifdef LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+    constexpr bool IS_FIXED_POINT = cpp::is_fixed_point_v<T>;
+#else
+    constexpr bool IS_FIXED_POINT = false;
+#endif // LIBC_INTERNAL_PRINTF_HAS_FIXED_POINT
+
+    return TypeDesc{sizeof(T), IS_POINTER       ? PrimaryType::Pointer
+                               : IS_FLOAT       ? PrimaryType::Float
+                               : IS_FIXED_POINT ? PrimaryType::FixedPoint
+                                                : PrimaryType::Integer};
   }
 }
 
diff --git a/libc/src/stdio/printf_core/fixed_converter.h b/libc/src/stdio/printf_core/fixed_converter.h
new file mode 100644
index 00000000000000..5d521f2957a560
--- /dev/null
+++ b/libc/src/stdio/printf_core/fixed_converter.h
@@ -0,0 +1,386 @@
+//===-- Fixed Point Converter for printf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FIXED_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_FIXED_CONVERTER_H
+
+#include "include/llvm-libc-macros/stdfix-macros.h"
+#include "src/__support/CPP/string_view.h"
+#include "src/__support/fixed_point/fx_bits.h"
+#include "src/__support/fixed_point/fx_rep.h"
+#include "src/__support/integer_to_string.h"
+#include "src/__support/libc_assert.h"
+#include "src/stdio/printf_core/converter_utils.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE {
+namespace printf_core {
+
+// This is just for assertions. It will be compiled out for release builds.
+LIBC_INLINE constexpr uint32_t const_ten_exp(uint32_t exponent) {
+  uint32_t result = 1;
+  LIBC_ASSERT(exponent < 11);
+  for (size_t i = 0; i < exponent; ++i)
+    result *= 10;
+
+  return result;
+}
+
+LIBC_INLINE int convert_fixed(Writer *writer, const FormatSection &to_conv) {
+  using SAStorageType = fixed_point::FXRep<short accum>::StorageType;
+  using AStorageType = fixed_point::FXRep<accum>::StorageType;
+  using LAStorageType = fixed_point::FXRep<long accum>::StorageType;
+  using SFStorageType = fixed_point::FXRep<short fract>::StorageType;
+  using FStorageType = fixed_point::FXRep<fract>::StorageType;
+  using LFStorageType = fixed_point::FXRep<long fract>::StorageType;
+
+  // Long accum should be the largest type, so we can store all the smaller
+  // numbers in things sized for it.
+  using LARep = fixed_point::FXRep<unsigned long accum>;
+  using StorageType = LARep::StorageType;
+
+  // All of the letters will be defined relative to variable a, which will be
+  // the appropriate case based on the name of the conversion. This converts any
+  // conversion name into the letter 'a' with the appropriate case.
+  const char a = (to_conv.conv_name & 32) | 'A';
+  FormatFlags flags = to_conv.flags;
+
+  bool is_negative;
+  int exponent;
+  StorageType integral;
+  StorageType fractional;
+
+  // TODO: See about simplifying this mess of a 3D matrix if statement.
+  // r = fract
+  // k = accum
+  // lowercase = signed
+  // uppercase = unsigned
+  // h = short
+  // l = long
+  // any other length modifier has no effect
+  if (to_conv.length_modifier == LengthModifier::h) {
+    // short types
+    if (to_conv.conv_name == 'r') {
+      auto fixed_bits = fixed_point::FXBits<short fract>(
+          static_cast<SFStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'R') {
+      auto fixed_bits = fixed_point::FXBits<unsigned short fract>(
+          static_cast<SFStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'k') {
+      auto fixed_bits = fixed_point::FXBits<short accum>(
+          static_cast<SAStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'K') {
+      auto fixed_bits = fixed_point::FXBits<unsigned short accum>(
+          static_cast<SAStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+  } else if (to_conv.length_modifier == LengthModifier::l) {
+    // long types
+    if (to_conv.conv_name == 'r') {
+      auto fixed_bits = fixed_point::FXBits<long fract>(
+          static_cast<LFStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'R') {
+      auto fixed_bits = fixed_point::FXBits<unsigned long fract>(
+          static_cast<LFStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'k') {
+      auto fixed_bits = fixed_point::FXBits<long accum>(
+          static_cast<LAStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'K') {
+      auto fixed_bits = fixed_point::FXBits<unsigned long accum>(
+          static_cast<LAStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+  } else {
+    // unspecified types
+    if (to_conv.conv_name == 'r') {
+      auto fixed_bits = fixed_point::FXBits<fract>(
+          static_cast<FStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'R') {
+      auto fixed_bits = fixed_point::FXBits<unsigned fract>(
+          static_cast<FStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'k') {
+      auto fixed_bits = fixed_point::FXBits<accum>(
+          static_cast<AStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+    if (to_conv.conv_name == 'K') {
+      auto fixed_bits = fixed_point::FXBits<unsigned accum>(
+          static_cast<AStorageType>(to_conv.conv_val_raw));
+      integral = fixed_bits.get_integral();
+      fractional = fixed_bits.get_fraction();
+      exponent = fixed_bits.get_exponent();
+      is_negative = fixed_bits.get_sign();
+    }
+  }
+
+  LIBC_ASSERT(static_cast<size_t>(exponent) <=
+                  (sizeof(StorageType) - sizeof(uint32_t)) * CHAR_BIT &&
+              "StorageType must be large enough to hold the fractional "
+              "component multiplied by a 32 bit number.");
+
+  char sign_char = 0;
+
+  // Check if the conv name is uppercase
+  if (a == 'A') {
+    // These flags are only for signed conversions, so this removes them if the
+    // conversion is unsigned.
+    flags = FormatFlags(flags &
+                        ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
+  }
+
+  if (is_negative)
+    sign_char = '-';
+  else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
+    sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
+  else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
+    sign_char = ' ';
+
+  // If to_conv doesn't specify a precision, the precision defaults to 6.
+  const size_t precision = to_conv.precision < 0 ? 6 : to_conv.precision;
+  bool has_decimal_point =
+      (precision > 0) || ((flags & FormatFlags::ALTERNATE_FORM) != 0);
+
+  // The number of non-zero digits below the decimal point for a negative power
+  // of 2 in base 10 is equal to the magnitude of the power of 2.
+
+  // A quick proof:
+  // Let p be any positive integer.
+  // Let e = 2^(-p)
+  // Let t be a positive integer such that e * 10^t is an integer.
+  // By definition: The smallest allowed value of t must be equal to the number
+  // of non-zero digits below the decimal point in e.
+  // If we evaluate e * 10^t we get the following:
+  // e * 10^t = 2^(-p) * 10*t = 2^(-p) * 2^t * 5^t = 5^t * 2^(t-p)
+  // For 5^t * 2^(t-p) to be an integer, both exponents must be non-negative,
+  // since 5 and 2 are coprime.
+  // The smallest value of t such that t-p is non-negative is p.
+  // Therefor, the number of non-zero digits below the decimal point for a given
+  // negative power of 2 "p" is equal to the value of p.
+
+  constexpr size_t MAX_FRACTION_DIGITS = LARep::FRACTION_LEN;
+
+  char fraction_digits[MAX_FRACTION_DIGITS];
+
+  size_t valid_fraction_digits = 0;
+
+  // TODO: Factor this part out
+  while (fractional > 0) {
+    uint32_t cur_digits = 0;
+    // 10^9 is used since it's the largest power of 10 that fits in a uint32_t
+    constexpr uint32_t TEN_EXP_NINE = 1000000000;
+    constexpr size_t DIGITS_PER_BLOCK = 9;
+
+    // Multiply by 10^9, then grab the digits above the decimal point, then
+    // clear those digits in fractional.
+    fractional = fractional * TEN_EXP_NINE;
+    cur_digits = static_cast<uint32_t>(fractional >> exponent);
+    fractional = fractional % (StorageType(1) << exponent);
+
+    // we add TEN_EXP_NINE to force leading zeroes to show up, then we skip the
+    // first digit in the loop.
+    const IntegerToString<uint32_t> cur_fractional_digits(cur_digits +
+                                                          TEN_EXP_NINE);
+    for (size_t i = 0;
+         i < DIGITS_PER_BLOCK && valid_fraction_digits < MAX_FRACTION_DIGITS;
+         ++i, ++valid_fraction_digits)
+      fraction_digits[valid_fraction_digits] =
+          cur_fractional_digits.view()[i + 1];
+
+    if (valid_fraction_digits >= MAX_FRACTION_DIGITS) {
+      LIBC_ASSERT(fractional == 0 && "If the fraction digit buffer is full, "
+                                     "there should be no remaining digits.");
+      /*
+        A visual explanation of what this assert is checking:
+
+         32 digits (max for 32 bit fract)
+         +------------------------------++--+--- must be zero
+         |                              ||  |
+         123456789012345678901234567890120000
+         |       ||       ||       ||       |
+         +-------++-------++-------++-------+
+         9 digit blocks
+      */
+      LIBC_ASSERT(cur_digits % const_ten_exp(
+                                   DIGITS_PER_BLOCK -
+                                   (MAX_FRACTION_DIGITS % DIGITS_PER_BLOCK)) ==
+                      0 &&
+                  "Digits after the MAX_FRACTION_DIGITS should all be zero.");
+      valid_fraction_digits = MAX_FRACTION_DIGITS;
+    }
+  }
+
+  if (precision < valid_fraction_digits) {
+    // Handle rounding. Just do round to nearest, tie to even since it's
+    // unspecified.
+    RoundDirection round;
+    char first_digit_after = fraction_digits[precision];
+    if (first_digit_after > '5') {
+      round = RoundDirection::Up;
+    } else if (first_digit_after < '5') {
+      round = RoundDirection::Down;
+    } else {
+      // first_digit_after == '5'
+      // need to check the remaining digits, but default to even.
+      round = RoundDirection::Even;
+      for (size_t cur_digit_index = precision + 1;
+           cur_digit_index + 1 < valid_fraction_digits; ++cur_digit_index) {
+        if (fraction_digits[cur_digit_index] != '0') {
+          round = RoundDirection::Up;
+          break;
+        }
+      }
+    }
+
+    // If we need to actually perform rounding, do so.
+    if (round == RoundDirection::Up || round == RoundDirection::Even) {
+      bool keep_rounding = true;
+      int digit_to_round = static_cast<int>(precision) - 1;
+      for (; digit_to_round >= 0 && keep_rounding; --digit_to_round) {
+        keep_rounding = false;
+        char cur_digit = fraction_digits[digit_to_round];
+        // if the digit should not be rounded up
+        if (round == RoundDirection::Even && ((cur_digit - '0') % 2) == 0) {
+          // break out of the loop
+          break;
+        }
+        fraction_digits[digit_to_round] += 1;
+
+        // if the digit was a 9, instead replace with a 0.
+        if (cur_digit == '9') {
+          fraction_digits[digit_to_round] = '0';
+          keep_rounding = true;
+        }
+      }
+
+      // if every digit below the decimal point was rounded up but we need to
+      // keep rounding
+      if (keep_rounding &&
+          (round == RoundDirection::Up ||
+           (round == RoundDirection::Even && ((integral % 2) == 1)))) {
+        // add one to the integral portion to round it up.
+        ++integral;
+      }
+    }
+
+    valid_fraction_digits = precision;
+  }
+
+  const IntegerToString<StorageType> integral_str(integral);
+
+  // these are signed to prevent underflow due to negative values. The
+  // eventual values will always be non-negative.
+  size_t trailing_zeroes = 0;
+  int padding;
+
+  // If the precision is greater than the actual result, pad with 0s
+  if (precision > valid_fraction_digits)
+    trailing_zeroes = precision - (valid_fraction_digits);
+
+  constexpr cpp::string_view DECIMAL_POINT(".");
+
+  padding = static_cast<int>(to_conv.min_width - (sign_char > 0 ? 1 : 0) -
+                             integral_str.size() -
+                             static_cast<int>(has_decimal_point) -
+                             valid_fraction_digits - trailing_zeroes);
+  if (padding < 0)
+    padding = 0;
+
+  if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
+    // The pattern is (sign), integral, (.), (fraction), (zeroes), (spaces)
+    if (sign_char > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write(sign_char));
+    RET_IF_RESULT_NEGATIVE(writer->write(integral_str.view()));
+    if (has_decimal_point)
+      RET_IF_RESULT_NEGATIVE(writer->write(DECIMAL_POINT));
+    if (valid_fraction_digits > 0)
+      RET_IF_RESULT_NEGATIVE(
+          writer->write({fraction_digits, valid_fraction_digits}));
+    if (trailing_zeroes > 0)
+      RET_IF_RESULT_NEGATIVE(writer->write('0', trailing_zeroes));
+    if (padding > 0)
+      RET_IF_RESULT_NEGATIVE(...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/82707