[libc-commits] [libc] 1be3669 - [libc] add printf base 10 integer conversion

Thu Jun 9 10:27:14 PDT 2022

Author: Michael Jones
Date: 2022-06-09T10:27:11-07:00
New Revision: 1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed

URL: https://github.com/llvm/llvm-project/commit/1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed
DIFF: https://github.com/llvm/llvm-project/commit/1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed.diff

LOG: [libc] add printf base 10 integer conversion

This patch adds support for d, i, and u conversions in printf, as well
as comprehensive unit tests.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D125929

Added: 
    libc/src/stdio/printf_core/int_converter.h

Modified: 
    libc/src/__support/CPP/Limits.h
    libc/src/stdio/printf_core/CMakeLists.txt
    libc/src/stdio/printf_core/char_converter.h
    libc/src/stdio/printf_core/converter.cpp
    libc/src/stdio/printf_core/converter_atlas.h
    libc/src/stdio/printf_core/string_converter.h
    libc/test/src/stdio/printf_core/converter_test.cpp
    libc/test/src/stdio/sprintf_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/__support/CPP/Limits.h b/libc/src/__support/CPP/Limits.h
index ba6c96fdef408..06104e99bbce9 100644

--- a/libc/src/__support/CPP/Limits.h
+++ b/libc/src/__support/CPP/Limits.h
@@ -52,6 +52,26 @@ template <> class NumericLimits<unsigned long long> {
   static constexpr unsigned long long max() { return ULLONG_MAX; }
   static constexpr unsigned long long min() { return 0; }
 };
+template <> class NumericLimits<short> {
+public:
+  static constexpr short max() { return SHRT_MAX; }
+  static constexpr short min() { return SHRT_MIN; }
+};
+template <> class NumericLimits<unsigned short> {
+public:
+  static constexpr unsigned short max() { return USHRT_MAX; }
+  static constexpr unsigned short min() { return 0; }
+};
+template <> class NumericLimits<char> {
+public:
+  static constexpr char max() { return CHAR_MAX; }
+  static constexpr char min() { return CHAR_MIN; }
+};
+template <> class NumericLimits<unsigned char> {
+public:
+  static constexpr unsigned char max() { return UCHAR_MAX; }
+  static constexpr unsigned char min() { return 0; }
+};
 #ifdef __SIZEOF_INT128__
 template <> class NumericLimits<__uint128_t> {
 public:

diff  --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 7a1e46907753e..8e2f162982362 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -55,6 +55,7 @@ add_object_library(
     converter_atlas.h
     string_converter.h
     char_converter.h
+    int_converter.h
   DEPENDS
     .writer
     .core_structs

diff  --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index d4ae6e04e5a90..d294be937cb8a 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -6,13 +6,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 
 namespace __llvm_libc {
 namespace printf_core {
 
-void convert_char(Writer *writer, const FormatSection &to_conv) {
+void inline convert_char(Writer *writer, const FormatSection &to_conv) {
   char c = to_conv.conv_val_raw;
 
   if (to_conv.min_width > 1) {
@@ -31,3 +34,5 @@ void convert_char(Writer *writer, const FormatSection &to_conv) {
 
 } // namespace printf_core
 } // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H

diff  --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
index 856f85f82b703..65f0bfb39c9e7 100644
--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -42,7 +42,7 @@ void convert(Writer *writer, const FormatSection &to_conv) {
   case 'd':
   case 'i':
   case 'u':
-    // convert_int(writer, to_conv);
+    convert_int(writer, to_conv);
     return;
   case 'o':
     // convert_oct(writer, to_conv);

diff  --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
index 2c3b7998b2956..f140ce1ab4c1d 100644
--- a/libc/src/stdio/printf_core/converter_atlas.h
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@@ -20,6 +20,8 @@
 #include "src/stdio/printf_core/char_converter.h"
 
 // defines convert_int
+#include "src/stdio/printf_core/int_converter.h"
+
 // defines convert_oct
 // defines convert_hex
 

diff  --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
new file mode 100644
index 0000000000000..4b40d474cddec
--- /dev/null
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -0,0 +1,174 @@
+//===-- Integer Converter for printf ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+
+#include "src/__support/CPP/Limits.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void inline convert_int(Writer *writer, const FormatSection &to_conv) {
+  static constexpr size_t BITS_IN_BYTE = 8;
+  static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
+
+  // This approximates the number of digits it takes to represent an integer of
+  // a certain number of bits. The calculation is floor((bits * 5) / 16)
+  // 32 -> 10 (actually needs 10)
+  // 64 -> 20 (actually needs 20)
+  // 128 -> 40 (actually needs 39)
+  // This estimation grows slightly faster than the actual value, but is close
+  // enough.
+
+  static constexpr size_t BUFF_LEN =
+      ((sizeof(uintmax_t) * BITS_IN_BYTE * 5) / 16);
+  uintmax_t num = to_conv.conv_val_raw;
+  char buffer[BUFF_LEN];
+  bool is_negative = false;
+  FormatFlags flags = to_conv.flags;
+
+  if (to_conv.conv_name == 'u') {
+    // These flags are only for signed conversions, so this removes them if the
+    // conversion is unsigned.
+    flags = FormatFlags(flags &
+                        ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
+  } else {
+    // Check if the number is negative by checking the high bit. This works even
+    // for smaller numbers because they're sign extended by default.
+    if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
+      is_negative = true;
+      num = -num;
+    }
+  }
+
+  switch (to_conv.length_modifier) {
+  case LengthModifier::none:
+    num = num & cpp::NumericLimits<unsigned int>::max();
+    break;
+
+  case LengthModifier::l:
+    num = num & cpp::NumericLimits<unsigned long>::max();
+    break;
+  case LengthModifier::ll:
+  case LengthModifier::L:
+    num = num & cpp::NumericLimits<unsigned long long>::max();
+    break;
+  case LengthModifier::h:
+    num = num & cpp::NumericLimits<unsigned short>::max();
+    break;
+  case LengthModifier::hh:
+    num = num & cpp::NumericLimits<unsigned char>::max();
+    break;
+  case LengthModifier::z:
+    num = num & cpp::NumericLimits<size_t>::max();
+    break;
+  case LengthModifier::t:
+    // We don't have unsigned ptr
diff  so uintptr_t is used, since we need an
+    // unsigned type and ptr
diff  is usually the same size as a pointer.
+    static_assert(sizeof(ptr
diff _t) == sizeof(uintptr_t));
+    num = num & cpp::NumericLimits<uintptr_t>::max();
+    break;
+  case LengthModifier::j:
+    // j is intmax, so no mask is necessary.
+    break;
+  }
+
+  // buff_cur can never reach 0, since the buffer is sized to always be able to
+  // contain the whole integer. This means that bounds checking it should be
+  // unnecessary.
+  size_t buff_cur = BUFF_LEN;
+  for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 10)
+    buffer[buff_cur - 1] = (num % 10) + '0';
+
+  size_t digits_written = BUFF_LEN - buff_cur;
+
+  char sign_char = 0;
+
+  if (is_negative)
+    sign_char = '-';
+  else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
+    sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
+  else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
+    sign_char = ' ';
+
+  int sign_char_len = (sign_char == 0 ? 0 : 1);
+
+  // These are signed to prevent underflow due to negative values. The eventual
+  // values will always be non-negative.
+  int zeroes;
+  int spaces;
+
+  // Negative precision indicates that it was not specified.
+  if (to_conv.precision < 0) {
+    if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
+        FormatFlags::LEADING_ZEROES) {
+      // If this conv has flag 0 but not - and no specified precision, it's
+      // padded with 0's instead of spaces identically to if precision =
+      // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
+      zeroes = to_conv.min_width - digits_written - sign_char_len;
+      if (zeroes < 0)
+        zeroes = 0;
+      spaces = 0;
+    } else if (digits_written < 1) {
+      // If no precision is specified, precision defaults to 1. This means that
+      // if the integer passed to the conversion is 0, a 0 will be printed.
+      // Example: ("%3d", 0) -> "  0"
+      zeroes = 1;
+      spaces = to_conv.min_width - zeroes - sign_char_len;
+    } else {
+      // If there are enough digits to pass over the precision, just write the
+      // number, padded by spaces.
+      zeroes = 0;
+      spaces = to_conv.min_width - digits_written - sign_char_len;
+    }
+  } else {
+    // If precision was specified, possibly write zeroes, and possibly write
+    // spaces. Example: ("%5.4d", 10000) -> "10000"
+    // If the check for if zeroes is negative was not there, spaces would be
+    // incorrectly evaluated as 1.
+    zeroes = to_conv.precision - digits_written; // a negative value means 0
+    if (zeroes < 0)
+      zeroes = 0;
+    spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
+  }
+  if (spaces < 0)
+    spaces = 0;
+
+  if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
+    // If left justified it goes sign zeroes digits spaces
+    if (sign_char != 0)
+      writer->write(&sign_char, 1);
+    if (zeroes > 0)
+      writer->write_chars('0', zeroes);
+    if (digits_written > 0)
+      writer->write(buffer + buff_cur, digits_written);
+    if (spaces > 0)
+      writer->write_chars(' ', spaces);
+  } else {
+    // Else it goes spaces sign zeroes digits
+    if (spaces > 0)
+      writer->write_chars(' ', spaces);
+    if (sign_char != 0)
+      writer->write(&sign_char, 1);
+    if (zeroes > 0)
+      writer->write_chars('0', zeroes);
+    if (digits_written > 0)
+      writer->write(buffer + buff_cur, digits_written);
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H

diff  --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index d3ceda283a5e7..3ca13ceb3e41d 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -6,6 +6,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
 
@@ -14,7 +17,7 @@
 namespace __llvm_libc {
 namespace printf_core {
 
-void convert_string(Writer *writer, const FormatSection &to_conv) {
+void inline convert_string(Writer *writer, const FormatSection &to_conv) {
   int string_len = 0;
 
   for (char *cur_str = reinterpret_cast<char *>(to_conv.conv_val_ptr);
@@ -44,3 +47,5 @@ void convert_string(Writer *writer, const FormatSection &to_conv) {
 
 } // namespace printf_core
 } // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H

diff  --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 6a1e303dbe7e8..b3bc5e53817b7 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -181,3 +181,17 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) {
   ASSERT_STREQ(str, "ghi ");
   ASSERT_EQ(writer.get_chars_written(), 4ull);
 }
+
+TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) {
+  __llvm_libc::printf_core::FormatSection section;
+  section.has_conv = true;
+  section.raw_string = "%d";
+  section.conv_name = 'd';
+  section.conv_val_raw = 12345;
+  __llvm_libc::printf_core::convert(&writer, section);
+
+  str_writer.terminate();
+
+  ASSERT_STREQ(str, "12345");
+  ASSERT_EQ(writer.get_chars_written(), 5ull);
+}

diff  --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 5714bd95eb54a..38407de9b0f13 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -76,6 +76,151 @@ TEST(LlvmLibcSPrintfTest, StringConv) {
   ASSERT_STREQ(buff, " beginning is   important.");
 }
 
+TEST(LlvmLibcSPrintfTest, IntConv) {
+  char buff[64];
+  int written;
+
+  // Basic Tests.
+
+  written = __llvm_libc::sprintf(buff, "%d", 123);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "123");
+
+  written = __llvm_libc::sprintf(buff, "%i", -456);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, "-456");
+
+  // Length Modifier Tests.
+
+  written = __llvm_libc::sprintf(buff, "%hhu", 257); // 0x10001
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "1");
+
+  written = __llvm_libc::sprintf(buff, "%llu", 18446744073709551615ull);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+  written = __llvm_libc::sprintf(buff, "%tu", ~ptr
diff _t(0));
+  if (sizeof(ptr
diff _t) == 8) {
+    EXPECT_EQ(written, 20);
+    ASSERT_STREQ(buff, "18446744073709551615");
+  } else if (sizeof(ptr
diff _t) == 4) {
+    EXPECT_EQ(written, 10);
+    ASSERT_STREQ(buff, "4294967296");
+  }
+
+  written = __llvm_libc::sprintf(buff, "%lld", -9223372036854775807ll - 1ll);
+  EXPECT_EQ(written, 20);
+  ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+  // Min Width Tests.
+
+  written = __llvm_libc::sprintf(buff, "%4d", 789);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, " 789");
+
+  written = __llvm_libc::sprintf(buff, "%2d", 987);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "987");
+
+  // Precision Tests.
+
+  written = __llvm_libc::sprintf(buff, "%d", 0);
+  EXPECT_EQ(written, 1);
+  ASSERT_STREQ(buff, "0");
+
+  written = __llvm_libc::sprintf(buff, "%.0d", 0);
+  EXPECT_EQ(written, 0);
+  ASSERT_STREQ(buff, "");
+
+  written = __llvm_libc::sprintf(buff, "%.5d", 654);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "00654");
+
+  written = __llvm_libc::sprintf(buff, "%.5d", -321);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "-00321");
+
+  written = __llvm_libc::sprintf(buff, "%.2d", 135);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "135");
+
+  // Flag Tests.
+
+  written = __llvm_libc::sprintf(buff, "%.5d", -321);
+  EXPECT_EQ(written, 6);
+  ASSERT_STREQ(buff, "-00321");
+
+  written = __llvm_libc::sprintf(buff, "%-5d", 246);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "246  ");
+
+  written = __llvm_libc::sprintf(buff, "%-5d", -147);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "-147 ");
+
+  written = __llvm_libc::sprintf(buff, "%+d", 258);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, "+258");
+
+  written = __llvm_libc::sprintf(buff, "% d", 369);
+  EXPECT_EQ(written, 4);
+  ASSERT_STREQ(buff, " 369");
+
+  written = __llvm_libc::sprintf(buff, "%05d", 470);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "00470");
+
+  written = __llvm_libc::sprintf(buff, "%05d", -581);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "-0581");
+
+  // Combined Tests.
+
+  written = __llvm_libc::sprintf(buff, "%+ u", 692);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "692");
+
+  written = __llvm_libc::sprintf(buff, "%+ -05d", 703);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "+703 ");
+
+  written = __llvm_libc::sprintf(buff, "%7.5d", 814);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "  00814");
+
+  written = __llvm_libc::sprintf(buff, "%7.5d", -925);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, " -00925");
+
+  written = __llvm_libc::sprintf(buff, "%7.5d", 159);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, "  00159");
+
+  written = __llvm_libc::sprintf(buff, "% -7.5d", 260);
+  EXPECT_EQ(written, 7);
+  ASSERT_STREQ(buff, " 00260 ");
+
+  written = __llvm_libc::sprintf(buff, "%5.4d", 10000);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "10000");
+
+  // Multiple Conversion Tests.
+
+  written = __llvm_libc::sprintf(buff, "%10d %-10d", 456, -789);
+  EXPECT_EQ(written, 21);
+  ASSERT_STREQ(buff, "       456 -789      ");
+
+  written = __llvm_libc::sprintf(buff, "%-5.4d%+.4u", 75, 25);
+  EXPECT_EQ(written, 9);
+  ASSERT_STREQ(buff, "0075 0025");
+
+  written = __llvm_libc::sprintf(buff, "% 05hhi %+-0.5llu %-+ 06.3zd",
+                                 256 + 127, 68719476736ll, size_t(2));
+  EXPECT_EQ(written, 24);
+  ASSERT_STREQ(buff, " 0127 68719476736 +002  ");
+}
+
 #ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
 TEST(LlvmLibcSPrintfTest, IndexModeParsing) {
   char buff[64];