[libc-commits] [libc] 1be3669 - [libc] add printf base 10 integer conversion
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Thu Jun 9 10:27:14 PDT 2022
Author: Michael Jones
Date: 2022-06-09T10:27:11-07:00
New Revision: 1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed
URL: https://github.com/llvm/llvm-project/commit/1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed
DIFF: https://github.com/llvm/llvm-project/commit/1be3669dda4d3c6a4aa3a36c2599f20a0a00c7ed.diff
LOG: [libc] add printf base 10 integer conversion
This patch adds support for d, i, and u conversions in printf, as well
as comprehensive unit tests.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D125929
Added:
libc/src/stdio/printf_core/int_converter.h
Modified:
libc/src/__support/CPP/Limits.h
libc/src/stdio/printf_core/CMakeLists.txt
libc/src/stdio/printf_core/char_converter.h
libc/src/stdio/printf_core/converter.cpp
libc/src/stdio/printf_core/converter_atlas.h
libc/src/stdio/printf_core/string_converter.h
libc/test/src/stdio/printf_core/converter_test.cpp
libc/test/src/stdio/sprintf_test.cpp
Removed:
################################################################################
diff --git a/libc/src/__support/CPP/Limits.h b/libc/src/__support/CPP/Limits.h
index ba6c96fdef408..06104e99bbce9 100644
--- a/libc/src/__support/CPP/Limits.h
+++ b/libc/src/__support/CPP/Limits.h
@@ -52,6 +52,26 @@ template <> class NumericLimits<unsigned long long> {
static constexpr unsigned long long max() { return ULLONG_MAX; }
static constexpr unsigned long long min() { return 0; }
};
+template <> class NumericLimits<short> {
+public:
+ static constexpr short max() { return SHRT_MAX; }
+ static constexpr short min() { return SHRT_MIN; }
+};
+template <> class NumericLimits<unsigned short> {
+public:
+ static constexpr unsigned short max() { return USHRT_MAX; }
+ static constexpr unsigned short min() { return 0; }
+};
+template <> class NumericLimits<char> {
+public:
+ static constexpr char max() { return CHAR_MAX; }
+ static constexpr char min() { return CHAR_MIN; }
+};
+template <> class NumericLimits<unsigned char> {
+public:
+ static constexpr unsigned char max() { return UCHAR_MAX; }
+ static constexpr unsigned char min() { return 0; }
+};
#ifdef __SIZEOF_INT128__
template <> class NumericLimits<__uint128_t> {
public:
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 7a1e46907753e..8e2f162982362 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -55,6 +55,7 @@ add_object_library(
converter_atlas.h
string_converter.h
char_converter.h
+ int_converter.h
DEPENDS
.writer
.core_structs
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index d4ae6e04e5a90..d294be937cb8a 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -6,13 +6,16 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
+
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
namespace __llvm_libc {
namespace printf_core {
-void convert_char(Writer *writer, const FormatSection &to_conv) {
+void inline convert_char(Writer *writer, const FormatSection &to_conv) {
char c = to_conv.conv_val_raw;
if (to_conv.min_width > 1) {
@@ -31,3 +34,5 @@ void convert_char(Writer *writer, const FormatSection &to_conv) {
} // namespace printf_core
} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
diff --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
index 856f85f82b703..65f0bfb39c9e7 100644
--- a/libc/src/stdio/printf_core/converter.cpp
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -42,7 +42,7 @@ void convert(Writer *writer, const FormatSection &to_conv) {
case 'd':
case 'i':
case 'u':
- // convert_int(writer, to_conv);
+ convert_int(writer, to_conv);
return;
case 'o':
// convert_oct(writer, to_conv);
diff --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
index 2c3b7998b2956..f140ce1ab4c1d 100644
--- a/libc/src/stdio/printf_core/converter_atlas.h
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@@ -20,6 +20,8 @@
#include "src/stdio/printf_core/char_converter.h"
// defines convert_int
+#include "src/stdio/printf_core/int_converter.h"
+
// defines convert_oct
// defines convert_hex
diff --git a/libc/src/stdio/printf_core/int_converter.h b/libc/src/stdio/printf_core/int_converter.h
new file mode 100644
index 0000000000000..4b40d474cddec
--- /dev/null
+++ b/libc/src/stdio/printf_core/int_converter.h
@@ -0,0 +1,174 @@
+//===-- Integer Converter for printf ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
+
+#include "src/__support/CPP/Limits.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <inttypes.h>
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void inline convert_int(Writer *writer, const FormatSection &to_conv) {
+ static constexpr size_t BITS_IN_BYTE = 8;
+ static constexpr size_t BITS_IN_NUM = sizeof(uintmax_t) * BITS_IN_BYTE;
+
+ // This approximates the number of digits it takes to represent an integer of
+ // a certain number of bits. The calculation is floor((bits * 5) / 16)
+ // 32 -> 10 (actually needs 10)
+ // 64 -> 20 (actually needs 20)
+ // 128 -> 40 (actually needs 39)
+ // This estimation grows slightly faster than the actual value, but is close
+ // enough.
+
+ static constexpr size_t BUFF_LEN =
+ ((sizeof(uintmax_t) * BITS_IN_BYTE * 5) / 16);
+ uintmax_t num = to_conv.conv_val_raw;
+ char buffer[BUFF_LEN];
+ bool is_negative = false;
+ FormatFlags flags = to_conv.flags;
+
+ if (to_conv.conv_name == 'u') {
+ // These flags are only for signed conversions, so this removes them if the
+ // conversion is unsigned.
+ flags = FormatFlags(flags &
+ ~(FormatFlags::FORCE_SIGN | FormatFlags::SPACE_PREFIX));
+ } else {
+ // Check if the number is negative by checking the high bit. This works even
+ // for smaller numbers because they're sign extended by default.
+ if ((num & (uintmax_t(1) << (BITS_IN_NUM - 1))) > 0) {
+ is_negative = true;
+ num = -num;
+ }
+ }
+
+ switch (to_conv.length_modifier) {
+ case LengthModifier::none:
+ num = num & cpp::NumericLimits<unsigned int>::max();
+ break;
+
+ case LengthModifier::l:
+ num = num & cpp::NumericLimits<unsigned long>::max();
+ break;
+ case LengthModifier::ll:
+ case LengthModifier::L:
+ num = num & cpp::NumericLimits<unsigned long long>::max();
+ break;
+ case LengthModifier::h:
+ num = num & cpp::NumericLimits<unsigned short>::max();
+ break;
+ case LengthModifier::hh:
+ num = num & cpp::NumericLimits<unsigned char>::max();
+ break;
+ case LengthModifier::z:
+ num = num & cpp::NumericLimits<size_t>::max();
+ break;
+ case LengthModifier::t:
+ // We don't have unsigned ptr
diff so uintptr_t is used, since we need an
+ // unsigned type and ptr
diff is usually the same size as a pointer.
+ static_assert(sizeof(ptr
diff _t) == sizeof(uintptr_t));
+ num = num & cpp::NumericLimits<uintptr_t>::max();
+ break;
+ case LengthModifier::j:
+ // j is intmax, so no mask is necessary.
+ break;
+ }
+
+ // buff_cur can never reach 0, since the buffer is sized to always be able to
+ // contain the whole integer. This means that bounds checking it should be
+ // unnecessary.
+ size_t buff_cur = BUFF_LEN;
+ for (; num > 0 /* && buff_cur > 0 */; --buff_cur, num /= 10)
+ buffer[buff_cur - 1] = (num % 10) + '0';
+
+ size_t digits_written = BUFF_LEN - buff_cur;
+
+ char sign_char = 0;
+
+ if (is_negative)
+ sign_char = '-';
+ else if ((flags & FormatFlags::FORCE_SIGN) == FormatFlags::FORCE_SIGN)
+ sign_char = '+'; // FORCE_SIGN has precedence over SPACE_PREFIX
+ else if ((flags & FormatFlags::SPACE_PREFIX) == FormatFlags::SPACE_PREFIX)
+ sign_char = ' ';
+
+ int sign_char_len = (sign_char == 0 ? 0 : 1);
+
+ // These are signed to prevent underflow due to negative values. The eventual
+ // values will always be non-negative.
+ int zeroes;
+ int spaces;
+
+ // Negative precision indicates that it was not specified.
+ if (to_conv.precision < 0) {
+ if ((flags & (FormatFlags::LEADING_ZEROES | FormatFlags::LEFT_JUSTIFIED)) ==
+ FormatFlags::LEADING_ZEROES) {
+ // If this conv has flag 0 but not - and no specified precision, it's
+ // padded with 0's instead of spaces identically to if precision =
+ // min_width - (1 if sign_char). For example: ("%+04d", 1) -> "+001"
+ zeroes = to_conv.min_width - digits_written - sign_char_len;
+ if (zeroes < 0)
+ zeroes = 0;
+ spaces = 0;
+ } else if (digits_written < 1) {
+ // If no precision is specified, precision defaults to 1. This means that
+ // if the integer passed to the conversion is 0, a 0 will be printed.
+ // Example: ("%3d", 0) -> " 0"
+ zeroes = 1;
+ spaces = to_conv.min_width - zeroes - sign_char_len;
+ } else {
+ // If there are enough digits to pass over the precision, just write the
+ // number, padded by spaces.
+ zeroes = 0;
+ spaces = to_conv.min_width - digits_written - sign_char_len;
+ }
+ } else {
+ // If precision was specified, possibly write zeroes, and possibly write
+ // spaces. Example: ("%5.4d", 10000) -> "10000"
+ // If the check for if zeroes is negative was not there, spaces would be
+ // incorrectly evaluated as 1.
+ zeroes = to_conv.precision - digits_written; // a negative value means 0
+ if (zeroes < 0)
+ zeroes = 0;
+ spaces = to_conv.min_width - zeroes - digits_written - sign_char_len;
+ }
+ if (spaces < 0)
+ spaces = 0;
+
+ if ((flags & FormatFlags::LEFT_JUSTIFIED) == FormatFlags::LEFT_JUSTIFIED) {
+ // If left justified it goes sign zeroes digits spaces
+ if (sign_char != 0)
+ writer->write(&sign_char, 1);
+ if (zeroes > 0)
+ writer->write_chars('0', zeroes);
+ if (digits_written > 0)
+ writer->write(buffer + buff_cur, digits_written);
+ if (spaces > 0)
+ writer->write_chars(' ', spaces);
+ } else {
+ // Else it goes spaces sign zeroes digits
+ if (spaces > 0)
+ writer->write_chars(' ', spaces);
+ if (sign_char != 0)
+ writer->write(&sign_char, 1);
+ if (zeroes > 0)
+ writer->write_chars('0', zeroes);
+ if (digits_written > 0)
+ writer->write(buffer + buff_cur, digits_written);
+ }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_INT_CONVERTER_H
diff --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
index d3ceda283a5e7..3ca13ceb3e41d 100644
--- a/libc/src/stdio/printf_core/string_converter.h
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -6,6 +6,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
+
#include "src/stdio/printf_core/core_structs.h"
#include "src/stdio/printf_core/writer.h"
@@ -14,7 +17,7 @@
namespace __llvm_libc {
namespace printf_core {
-void convert_string(Writer *writer, const FormatSection &to_conv) {
+void inline convert_string(Writer *writer, const FormatSection &to_conv) {
int string_len = 0;
for (char *cur_str = reinterpret_cast<char *>(to_conv.conv_val_ptr);
@@ -44,3 +47,5 @@ void convert_string(Writer *writer, const FormatSection &to_conv) {
} // namespace printf_core
} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_STRING_CONVERTER_H
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 6a1e303dbe7e8..b3bc5e53817b7 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -181,3 +181,17 @@ TEST_F(LlvmLibcPrintfConverterTest, StringConversionLeftJustified) {
ASSERT_STREQ(str, "ghi ");
ASSERT_EQ(writer.get_chars_written(), 4ull);
}
+
+TEST_F(LlvmLibcPrintfConverterTest, IntConversionSimple) {
+ __llvm_libc::printf_core::FormatSection section;
+ section.has_conv = true;
+ section.raw_string = "%d";
+ section.conv_name = 'd';
+ section.conv_val_raw = 12345;
+ __llvm_libc::printf_core::convert(&writer, section);
+
+ str_writer.terminate();
+
+ ASSERT_STREQ(str, "12345");
+ ASSERT_EQ(writer.get_chars_written(), 5ull);
+}
diff --git a/libc/test/src/stdio/sprintf_test.cpp b/libc/test/src/stdio/sprintf_test.cpp
index 5714bd95eb54a..38407de9b0f13 100644
--- a/libc/test/src/stdio/sprintf_test.cpp
+++ b/libc/test/src/stdio/sprintf_test.cpp
@@ -76,6 +76,151 @@ TEST(LlvmLibcSPrintfTest, StringConv) {
ASSERT_STREQ(buff, " beginning is important.");
}
+TEST(LlvmLibcSPrintfTest, IntConv) {
+ char buff[64];
+ int written;
+
+ // Basic Tests.
+
+ written = __llvm_libc::sprintf(buff, "%d", 123);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ(buff, "123");
+
+ written = __llvm_libc::sprintf(buff, "%i", -456);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, "-456");
+
+ // Length Modifier Tests.
+
+ written = __llvm_libc::sprintf(buff, "%hhu", 257); // 0x10001
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "1");
+
+ written = __llvm_libc::sprintf(buff, "%llu", 18446744073709551615ull);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615"); // ull max
+
+ written = __llvm_libc::sprintf(buff, "%tu", ~ptr
diff _t(0));
+ if (sizeof(ptr
diff _t) == 8) {
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "18446744073709551615");
+ } else if (sizeof(ptr
diff _t) == 4) {
+ EXPECT_EQ(written, 10);
+ ASSERT_STREQ(buff, "4294967296");
+ }
+
+ written = __llvm_libc::sprintf(buff, "%lld", -9223372036854775807ll - 1ll);
+ EXPECT_EQ(written, 20);
+ ASSERT_STREQ(buff, "-9223372036854775808"); // ll min
+
+ // Min Width Tests.
+
+ written = __llvm_libc::sprintf(buff, "%4d", 789);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, " 789");
+
+ written = __llvm_libc::sprintf(buff, "%2d", 987);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ(buff, "987");
+
+ // Precision Tests.
+
+ written = __llvm_libc::sprintf(buff, "%d", 0);
+ EXPECT_EQ(written, 1);
+ ASSERT_STREQ(buff, "0");
+
+ written = __llvm_libc::sprintf(buff, "%.0d", 0);
+ EXPECT_EQ(written, 0);
+ ASSERT_STREQ(buff, "");
+
+ written = __llvm_libc::sprintf(buff, "%.5d", 654);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "00654");
+
+ written = __llvm_libc::sprintf(buff, "%.5d", -321);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ(buff, "-00321");
+
+ written = __llvm_libc::sprintf(buff, "%.2d", 135);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ(buff, "135");
+
+ // Flag Tests.
+
+ written = __llvm_libc::sprintf(buff, "%.5d", -321);
+ EXPECT_EQ(written, 6);
+ ASSERT_STREQ(buff, "-00321");
+
+ written = __llvm_libc::sprintf(buff, "%-5d", 246);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "246 ");
+
+ written = __llvm_libc::sprintf(buff, "%-5d", -147);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "-147 ");
+
+ written = __llvm_libc::sprintf(buff, "%+d", 258);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, "+258");
+
+ written = __llvm_libc::sprintf(buff, "% d", 369);
+ EXPECT_EQ(written, 4);
+ ASSERT_STREQ(buff, " 369");
+
+ written = __llvm_libc::sprintf(buff, "%05d", 470);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "00470");
+
+ written = __llvm_libc::sprintf(buff, "%05d", -581);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "-0581");
+
+ // Combined Tests.
+
+ written = __llvm_libc::sprintf(buff, "%+ u", 692);
+ EXPECT_EQ(written, 3);
+ ASSERT_STREQ(buff, "692");
+
+ written = __llvm_libc::sprintf(buff, "%+ -05d", 703);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "+703 ");
+
+ written = __llvm_libc::sprintf(buff, "%7.5d", 814);
+ EXPECT_EQ(written, 7);
+ ASSERT_STREQ(buff, " 00814");
+
+ written = __llvm_libc::sprintf(buff, "%7.5d", -925);
+ EXPECT_EQ(written, 7);
+ ASSERT_STREQ(buff, " -00925");
+
+ written = __llvm_libc::sprintf(buff, "%7.5d", 159);
+ EXPECT_EQ(written, 7);
+ ASSERT_STREQ(buff, " 00159");
+
+ written = __llvm_libc::sprintf(buff, "% -7.5d", 260);
+ EXPECT_EQ(written, 7);
+ ASSERT_STREQ(buff, " 00260 ");
+
+ written = __llvm_libc::sprintf(buff, "%5.4d", 10000);
+ EXPECT_EQ(written, 5);
+ ASSERT_STREQ(buff, "10000");
+
+ // Multiple Conversion Tests.
+
+ written = __llvm_libc::sprintf(buff, "%10d %-10d", 456, -789);
+ EXPECT_EQ(written, 21);
+ ASSERT_STREQ(buff, " 456 -789 ");
+
+ written = __llvm_libc::sprintf(buff, "%-5.4d%+.4u", 75, 25);
+ EXPECT_EQ(written, 9);
+ ASSERT_STREQ(buff, "0075 0025");
+
+ written = __llvm_libc::sprintf(buff, "% 05hhi %+-0.5llu %-+ 06.3zd",
+ 256 + 127, 68719476736ll, size_t(2));
+ EXPECT_EQ(written, 24);
+ ASSERT_STREQ(buff, " 0127 68719476736 +002 ");
+}
+
#ifndef LLVM_LIBC_PRINTF_DISABLE_INDEX_MODE
TEST(LlvmLibcSPrintfTest, IndexModeParsing) {
char buff[64];
More information about the libc-commits
mailing list