[libc-commits] [libc] 6a22b18 - [libc] add printf converter

Michael Jones via libc-commits libc-commits at lists.llvm.org
Thu May 12 13:10:10 PDT 2022


Author: Michael Jones
Date: 2022-05-12T13:10:05-07:00
New Revision: 6a22b185d6f9461209947685e521a8f9a28bd983

URL: https://github.com/llvm/llvm-project/commit/6a22b185d6f9461209947685e521a8f9a28bd983
DIFF: https://github.com/llvm/llvm-project/commit/6a22b185d6f9461209947685e521a8f9a28bd983.diff

LOG: [libc] add printf converter

This adds the main pieces of the last piece of printf, the converter.
This takes the completed format section from the parser and then
converts it to a string for the writer, which is why it was the last
piece to be written. So far it supports chars and strings, but more
pieces are coming. Additionally, it supports replacing all of the
conversion functions with user supplied versions at compile time to
allow for additional functionality.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D125327

Added: 
    libc/src/stdio/printf_core/char_converter.h
    libc/src/stdio/printf_core/converter.cpp
    libc/src/stdio/printf_core/converter_atlas.h
    libc/src/stdio/printf_core/string_converter.h
    libc/test/src/stdio/printf_core/converter_test.cpp

Modified: 
    libc/src/stdio/printf_core/CMakeLists.txt
    libc/src/stdio/printf_core/converter.h
    libc/src/stdio/printf_core/core_structs.h
    libc/src/stdio/printf_core/parser.cpp
    libc/src/stdio/printf_core/printf_main.h
    libc/test/src/stdio/printf_core/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 965bbcd14da8..a386afcdf684 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -37,3 +37,17 @@ add_object_library(
   DEPENDS
     libc.src.string.memory_utils.memset_implementation
 )
+
+add_object_library(
+  converter
+  SRCS
+    converter.cpp
+  HDRS
+    converter.h
+    converter_atlas.h
+    string_converter.h
+    char_converter.h
+  DEPENDS
+    .writer
+    .core_structs
+)

diff  --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
new file mode 100644
index 000000000000..45740455610a
--- /dev/null
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -0,0 +1,33 @@
+//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void convert_char(Writer *writer, FormatSection to_conv) {
+  char c = to_conv.conv_val_raw;
+
+  if (to_conv.min_width > 1) {
+    if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
+        FormatFlags::LEFT_JUSTIFIED) {
+      writer->write(&c, 1);
+      writer->write_chars(' ', to_conv.min_width - 1);
+    } else {
+      writer->write_chars(' ', to_conv.min_width - 1);
+      writer->write(&c, 1);
+    }
+  } else {
+    writer->write(&c, 1);
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc

diff  --git a/libc/src/stdio/printf_core/converter.cpp b/libc/src/stdio/printf_core/converter.cpp
new file mode 100644
index 000000000000..cc684b789bd5
--- /dev/null
+++ b/libc/src/stdio/printf_core/converter.cpp
@@ -0,0 +1,85 @@
+//===-- Format specifier converter implmentation for printf -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/printf_core/converter.h"
+
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+// This option allows for replacing all of the conversion functions with custom
+// replacements. This allows conversions to be replaced at compile time.
+#ifndef LLVM_LIBC_PRINTF_CONV_ATLAS
+#include "src/stdio/printf_core/converter_atlas.h"
+#else
+#include LLVM_LIBC_PRINTF_CONV_ATLAS
+#endif
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void convert(Writer *writer, FormatSection to_conv) {
+  if (!to_conv.has_conv) {
+    writer->write(to_conv.raw_string, to_conv.raw_len);
+    return;
+  }
+  switch (to_conv.conv_name) {
+  case '%':
+    writer->write("%", 1);
+    return;
+  case 'c':
+    convert_char(writer, to_conv);
+    return;
+  case 's':
+    convert_string(writer, to_conv);
+    return;
+  case 'd':
+  case 'i':
+  case 'u':
+    // convert_int(writer, to_conv);
+    return;
+  case 'o':
+    // convert_oct(writer, to_conv);
+    return;
+  case 'x':
+  case 'X':
+    // convert_hex(writer, to_conv);
+    return;
+  // TODO(michaelrj): add a flag to disable float point values here
+  case 'f':
+  case 'F':
+    // convert_float_decimal(writer, to_conv);
+    return;
+  case 'e':
+  case 'E':
+    // convert_float_dec_exp(writer, to_conv);
+    return;
+  case 'a':
+  case 'A':
+    // convert_float_hex_exp(writer, to_conv);
+    return;
+  case 'g':
+  case 'G':
+    // convert_float_mixed(writer, to_conv);
+    return;
+  // TODO(michaelrj): add a flag to disable writing an int here
+  case 'n':
+    // convert_write_int(writer, to_conv);
+    return;
+  case 'p':
+    // convert_pointer(writer, to_conv);
+    return;
+  default:
+    writer->write(to_conv.raw_string, to_conv.raw_len);
+    return;
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc

diff  --git a/libc/src/stdio/printf_core/converter.h b/libc/src/stdio/printf_core/converter.h
index 114a409f8529..15e4e8d80f5d 100644
--- a/libc/src/stdio/printf_core/converter.h
+++ b/libc/src/stdio/printf_core/converter.h
@@ -17,17 +17,10 @@
 namespace __llvm_libc {
 namespace printf_core {
 
-class Converter {
-  Writer *writer;
-
-public:
-  Converter(Writer *writer);
-
-  // convert will call a conversion function to convert the FormatSection into
-  // its string representation, and then that will write the result to the
-  // writer.
-  void convert(FormatSection to_conv);
-};
+// convert will call a conversion function to convert the FormatSection into
+// its string representation, and then that will write the result to the
+// writer.
+void convert(Writer *writer, FormatSection to_conv);
 
 } // namespace printf_core
 } // namespace __llvm_libc

diff  --git a/libc/src/stdio/printf_core/converter_atlas.h b/libc/src/stdio/printf_core/converter_atlas.h
new file mode 100644
index 000000000000..2c3b7998b295
--- /dev/null
+++ b/libc/src/stdio/printf_core/converter_atlas.h
@@ -0,0 +1,37 @@
+//===-- Map of converter headers in printf ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This file exists so that if the user wants to supply a custom atlas they can
+// just replace the #include, additionally it keeps the ifdefs out of the
+// converter header.
+
+#ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_ATLAS_H
+#define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_ATLAS_H
+
+// defines convert_string
+#include "src/stdio/printf_core/string_converter.h"
+
+// defines convert_char
+#include "src/stdio/printf_core/char_converter.h"
+
+// defines convert_int
+// defines convert_oct
+// defines convert_hex
+
+// TODO(michaelrj): add a flag to disable float point values here
+// defines convert_float_decimal
+// defines convert_float_dec_exp
+// defines convert_float_hex_exp
+// defines convert_float_mixed
+
+// TODO(michaelrj): add a flag to disable writing an int here
+// defines convert_write_int
+
+// defines convert_pointer
+
+#endif // LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CONVERTER_ATLAS_H

diff  --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h
index deb348f66516..80f187102bc1 100644
--- a/libc/src/stdio/printf_core/core_structs.h
+++ b/libc/src/stdio/printf_core/core_structs.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H
 
 #include "src/__support/CPP/StringView.h"
+#include "src/__support/FPUtil/FPBits.h"
 
 #include <inttypes.h>
 #include <stddef.h>
@@ -45,7 +46,8 @@ struct FormatSection {
   int min_width = 0;
   int precision = -1;
 
-  __uint128_t conv_val_raw; // Needs to be large enough to hold a long double.
+  // Needs to be large enough to hold a long double.
+  fputil::FPBits<long double>::UIntType conv_val_raw;
   void *conv_val_ptr;
 
   char conv_name;

diff  --git a/libc/src/stdio/printf_core/parser.cpp b/libc/src/stdio/printf_core/parser.cpp
index a373f38536b5..c1f8ea90624b 100644
--- a/libc/src/stdio/printf_core/parser.cpp
+++ b/libc/src/stdio/printf_core/parser.cpp
@@ -13,6 +13,7 @@
 #include "src/__support/arg_list.h"
 
 #include "src/__support/CPP/Bit.h"
+#include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/ctype_utils.h"
 #include "src/__support/str_to_integer.h"
 
@@ -120,6 +121,7 @@ FormatSection Parser::get_next_section() {
         break;
       }
       break;
+      // TODO(michaelrj): add a flag to disable float point values here
     case ('f'):
     case ('F'):
     case ('e'):
@@ -132,7 +134,7 @@ FormatSection Parser::get_next_section() {
         section.conv_val_raw =
             bit_cast<uint64_t>(GET_ARG_VAL_SIMPLEST(double, conv_index));
       else
-        section.conv_val_raw = bit_cast<__uint128_t>(
+        section.conv_val_raw = bit_cast<fputil::FPBits<long double>::UIntType>(
             GET_ARG_VAL_SIMPLEST(long double, conv_index));
       break;
     case ('n'):
@@ -335,6 +337,7 @@ Parser::TypeDesc Parser::get_type_desc(size_t index) {
           break;
         }
         break;
+      // TODO(michaelrj): add a flag to disable float point values here
       case ('f'):
       case ('F'):
       case ('e'):
@@ -388,6 +391,7 @@ void Parser::args_to_index(size_t index) {
       args_cur.next_var<uint32_t>();
     else if (cur_type_desc == TYPE_DESC<uint64_t>)
       args_cur.next_var<uint64_t>();
+    // TODO(michaelrj): add a flag to disable float point values here
     // Floating point numbers are stored separately from the other arguments.
     else if (cur_type_desc == TYPE_DESC<double>)
       args_cur.next_var<double>();

diff  --git a/libc/src/stdio/printf_core/printf_main.h b/libc/src/stdio/printf_core/printf_main.h
index b03683513770..78057f0a0ff7 100644
--- a/libc/src/stdio/printf_core/printf_main.h
+++ b/libc/src/stdio/printf_core/printf_main.h
@@ -23,12 +23,11 @@ namespace printf_core {
 int printf_main(Writer *writer, const char *__restrict str,
                 internal::ArgList args) {
   Parser parser(str, args);
-  Converter converter(writer);
 
   for (FormatSection cur_section = parser.get_next_section();
        cur_section.raw_len > 0; cur_section = parser.get_next_section()) {
     if (cur_section.has_conv)
-      converter.convert(cur_section);
+      convert(writer, cur_section);
     else
       writer->write(cur_section.raw_string, cur_section.raw_len);
   }

diff  --git a/libc/src/stdio/printf_core/string_converter.h b/libc/src/stdio/printf_core/string_converter.h
new file mode 100644
index 000000000000..b0a918869841
--- /dev/null
+++ b/libc/src/stdio/printf_core/string_converter.h
@@ -0,0 +1,46 @@
+//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace printf_core {
+
+void convert_string(Writer *writer, FormatSection to_conv) {
+  int string_len = 0;
+
+  for (char *cur_str = reinterpret_cast<char *>(to_conv.conv_val_ptr);
+       cur_str[string_len]; ++string_len) {
+    ;
+  }
+
+  if (to_conv.precision >= 0 && to_conv.precision < string_len)
+    string_len = to_conv.precision;
+
+  if (to_conv.min_width > string_len) {
+    if ((to_conv.flags & FormatFlags::LEFT_JUSTIFIED) ==
+        FormatFlags::LEFT_JUSTIFIED) {
+      writer->write(reinterpret_cast<const char *>(to_conv.conv_val_ptr),
+                    string_len);
+      writer->write_chars(' ', to_conv.min_width - string_len);
+    } else {
+      writer->write_chars(' ', to_conv.min_width - string_len);
+      writer->write(reinterpret_cast<const char *>(to_conv.conv_val_ptr),
+                    string_len);
+    }
+  } else {
+    writer->write(reinterpret_cast<const char *>(to_conv.conv_val_ptr),
+                  string_len);
+  }
+}
+
+} // namespace printf_core
+} // namespace __llvm_libc

diff  --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index 2998e1d22183..692a39621534 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_libc_unittest(
     parser_test.cpp
   DEPENDS
     libc.src.stdio.printf_core.parser
+    libc.src.stdio.printf_core.core_structs
     libc.src.__support.arg_list
   LINK_LIBRARIES
     LibcPrintfHelpers
@@ -21,3 +22,16 @@ add_libc_unittest(
     libc.src.stdio.printf_core.writer
     libc.src.stdio.printf_core.string_writer
 )
+
+add_libc_unittest(
+  converter_test
+  SUITE
+    libc_stdio_unittests
+  SRCS
+    converter_test.cpp
+  DEPENDS
+    libc.src.stdio.printf_core.converter
+    libc.src.stdio.printf_core.writer
+    libc.src.stdio.printf_core.string_writer
+    libc.src.stdio.printf_core.core_structs
+)

diff  --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
new file mode 100644
index 000000000000..3cacc24ea61e
--- /dev/null
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -0,0 +1,189 @@
+//===-- Unittests for the printf Converter --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdio/printf_core/converter.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/string_writer.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include "utils/UnitTest/Test.h"
+
+TEST(LlvmLibcPrintfConverterTest, SimpleRawConversion) {
+  char str[10];
+  __llvm_libc::printf_core::StringWriter str_writer(str);
+  __llvm_libc::printf_core::Writer writer(
+      reinterpret_cast<void *>(&str_writer),
+      __llvm_libc::printf_core::write_to_string);
+
+  __llvm_libc::printf_core::FormatSection raw_section;
+  raw_section.has_conv = false;
+  raw_section.raw_string = "abc";
+  raw_section.raw_len = 3;
+
+  __llvm_libc::printf_core::convert(&writer, raw_section);
+
+  str_writer.terminate();
+
+  ASSERT_STREQ(str, "abc");
+  ASSERT_EQ(writer.get_chars_written(), 3ull);
+}
+
+TEST(LlvmLibcPrintfConverterTest, PercentConversion) {
+  char str[20];
+  __llvm_libc::printf_core::StringWriter str_writer(str);
+  __llvm_libc::printf_core::Writer writer(
+      reinterpret_cast<void *>(&str_writer),
+      __llvm_libc::printf_core::write_to_string);
+
+  __llvm_libc::printf_core::FormatSection simple_conv;
+  simple_conv.has_conv = true;
+  simple_conv.raw_string = "abc123";
+  simple_conv.raw_len = 6;
+  simple_conv.conv_name = '%';
+
+  __llvm_libc::printf_core::convert(&writer, simple_conv);
+
+  str[1] = '\0';
+
+  ASSERT_STREQ(str, "%");
+  ASSERT_EQ(writer.get_chars_written(), 1ull);
+}
+
+TEST(LlvmLibcPrintfConverterTest, CharConversion) {
+  char str[20];
+  __llvm_libc::printf_core::StringWriter str_writer(str);
+  __llvm_libc::printf_core::Writer writer(
+      reinterpret_cast<void *>(&str_writer),
+      __llvm_libc::printf_core::write_to_string);
+
+  __llvm_libc::printf_core::FormatSection simple_conv;
+  simple_conv.has_conv = true;
+  simple_conv.raw_string = "abc123";
+  simple_conv.raw_len = 6;
+  simple_conv.conv_name = 'c';
+  simple_conv.conv_val_raw = 'D';
+
+  __llvm_libc::printf_core::convert(&writer, simple_conv);
+
+  str[1] = '\0';
+
+  ASSERT_STREQ(str, "D");
+  ASSERT_EQ(writer.get_chars_written(), 1ull);
+
+  __llvm_libc::printf_core::FormatSection right_justified_conv;
+  right_justified_conv.has_conv = true;
+  right_justified_conv.raw_string = "abc123";
+  right_justified_conv.raw_len = 6;
+  right_justified_conv.conv_name = 'c';
+  right_justified_conv.min_width = 4;
+  right_justified_conv.conv_val_raw = 'E';
+  __llvm_libc::printf_core::convert(&writer, right_justified_conv);
+
+  str[5] = '\0';
+
+  ASSERT_STREQ(str, "D   E");
+  ASSERT_EQ(writer.get_chars_written(), 5ull);
+
+  __llvm_libc::printf_core::FormatSection left_justified_conv;
+  left_justified_conv.has_conv = true;
+  left_justified_conv.raw_string = "abc123";
+  left_justified_conv.raw_len = 6;
+  left_justified_conv.conv_name = 'c';
+  left_justified_conv.flags =
+      __llvm_libc::printf_core::FormatFlags::LEFT_JUSTIFIED;
+  left_justified_conv.min_width = 4;
+  left_justified_conv.conv_val_raw = 'F';
+  __llvm_libc::printf_core::convert(&writer, left_justified_conv);
+
+  str[9] = '\0';
+
+  ASSERT_STREQ(str, "D   EF   ");
+  ASSERT_EQ(writer.get_chars_written(), 9ull);
+}
+
+TEST(LlvmLibcPrintfConverterTest, StringConversion) {
+  char str[20];
+  __llvm_libc::printf_core::StringWriter str_writer(str);
+  __llvm_libc::printf_core::Writer writer(
+      reinterpret_cast<void *>(&str_writer),
+      __llvm_libc::printf_core::write_to_string);
+
+  __llvm_libc::printf_core::FormatSection simple_conv;
+  simple_conv.has_conv = true;
+  simple_conv.raw_string = "abc123";
+  simple_conv.raw_len = 6;
+  simple_conv.conv_name = 's';
+  simple_conv.conv_val_ptr = const_cast<char *>("DEF");
+
+  __llvm_libc::printf_core::convert(&writer, simple_conv);
+
+  str[3] = '\0'; // this null terminator is just for checking after every step.
+
+  ASSERT_STREQ(str, "DEF");
+  ASSERT_EQ(writer.get_chars_written(), 3ull);
+
+  // continuing to write to this str_writer will overwrite that null terminator.
+
+  __llvm_libc::printf_core::FormatSection high_precision_conv;
+  high_precision_conv.has_conv = true;
+  high_precision_conv.raw_string = "abc123";
+  high_precision_conv.raw_len = 6;
+  high_precision_conv.conv_name = 's';
+  high_precision_conv.precision = 4;
+  high_precision_conv.conv_val_ptr = const_cast<char *>("456");
+  __llvm_libc::printf_core::convert(&writer, high_precision_conv);
+
+  str[6] = '\0';
+
+  ASSERT_STREQ(str, "DEF456");
+  ASSERT_EQ(writer.get_chars_written(), 6ull);
+
+  __llvm_libc::printf_core::FormatSection low_precision_conv;
+  low_precision_conv.has_conv = true;
+  low_precision_conv.raw_string = "abc123";
+  low_precision_conv.raw_len = 6;
+  low_precision_conv.conv_name = 's';
+  low_precision_conv.precision = 2;
+  low_precision_conv.conv_val_ptr = const_cast<char *>("xyz");
+  __llvm_libc::printf_core::convert(&writer, low_precision_conv);
+
+  str[8] = '\0';
+
+  ASSERT_STREQ(str, "DEF456xy");
+  ASSERT_EQ(writer.get_chars_written(), 8ull);
+
+  __llvm_libc::printf_core::FormatSection right_justified_conv;
+  right_justified_conv.has_conv = true;
+  right_justified_conv.raw_string = "abc123";
+  right_justified_conv.raw_len = 6;
+  right_justified_conv.conv_name = 's';
+  right_justified_conv.min_width = 4;
+  right_justified_conv.conv_val_ptr = const_cast<char *>("789");
+  __llvm_libc::printf_core::convert(&writer, right_justified_conv);
+
+  str[12] = '\0';
+
+  ASSERT_STREQ(str, "DEF456xy 789");
+  ASSERT_EQ(writer.get_chars_written(), 12ull);
+
+  __llvm_libc::printf_core::FormatSection left_justified_conv;
+  left_justified_conv.has_conv = true;
+  left_justified_conv.raw_string = "abc123";
+  left_justified_conv.raw_len = 6;
+  left_justified_conv.conv_name = 's';
+  left_justified_conv.flags =
+      __llvm_libc::printf_core::FormatFlags::LEFT_JUSTIFIED;
+  left_justified_conv.min_width = 4;
+  left_justified_conv.conv_val_ptr = const_cast<char *>("ghi");
+  __llvm_libc::printf_core::convert(&writer, left_justified_conv);
+
+  str[16] = '\0';
+
+  ASSERT_STREQ(str, "DEF456xy 789ghi ");
+  ASSERT_EQ(writer.get_chars_written(), 16ull);
+}


        


More information about the libc-commits mailing list