[libc-commits] [libc] afa764c - [libc] add scanf current position conversion

Wed Jan 25 15:29:30 PST 2023

Author: Michael Jones
Date: 2023-01-25T15:29:22-08:00
New Revision: afa764c9a6d68ab6de94a5d007e0b468e65ae946

URL: https://github.com/llvm/llvm-project/commit/afa764c9a6d68ab6de94a5d007e0b468e65ae946
DIFF: https://github.com/llvm/llvm-project/commit/afa764c9a6d68ab6de94a5d007e0b468e65ae946.diff

LOG: [libc] add scanf current position conversion

To add the current position (%n) conversion, some reorganization needed
to be done. The "write a number to this pointer using the length
modifier" utilities and a couple other shared parsing functions have
been moved into converter_utils.h. This made implementing
current_pos_converter very simple.

Reviewed By: lntue

Differential Revision: https://reviews.llvm.org/D142495

Added: 
    libc/src/stdio/scanf_core/converter_utils.h
    libc/src/stdio/scanf_core/current_pos_converter.h

Modified: 
    libc/src/stdio/scanf_core/CMakeLists.txt
    libc/src/stdio/scanf_core/converter.cpp
    libc/src/stdio/scanf_core/float_converter.cpp
    libc/src/stdio/scanf_core/int_converter.cpp
    libc/src/stdio/scanf_core/scanf_main.cpp
    libc/test/src/stdio/sscanf_test.cpp

Removed: 
    


################################################################################
diff  --git a/libc/src/stdio/scanf_core/CMakeLists.txt b/libc/src/stdio/scanf_core/CMakeLists.txt
index 54fa40d5363b4..d0eaa1a44012e 100644

--- a/libc/src/stdio/scanf_core/CMakeLists.txt
+++ b/libc/src/stdio/scanf_core/CMakeLists.txt
@@ -82,19 +82,21 @@ add_object_library(
     float_converter.cpp
   HDRS
     converter.h
+    converter_utils.h
     int_converter.h
     string_converter.h
     float_converter.h
+    current_pos_converter.h
   DEPENDS
     .reader
     .core_structs
+    libc.src.__support.common
     libc.src.__support.ctype_utils
     libc.src.__support.CPP.bitset
     libc.src.__support.CPP.string_view
     libc.src.__support.CPP.limits
     libc.src.__support.char_vector
-    libc.include.errno
-    libc.src.errno.errno
+    libc.src.__support.str_to_float
 )
 
 add_object_library(

diff  --git a/libc/src/stdio/scanf_core/converter.cpp b/libc/src/stdio/scanf_core/converter.cpp
index c5a2932fb18f8..053e215b4d983 100644
--- a/libc/src/stdio/scanf_core/converter.cpp
+++ b/libc/src/stdio/scanf_core/converter.cpp
@@ -15,6 +15,7 @@
 #ifndef LLVM_LIBC_SCANF_DISABLE_FLOAT
 #include "src/stdio/scanf_core/float_converter.h"
 #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
+#include "src/stdio/scanf_core/current_pos_converter.h"
 #include "src/stdio/scanf_core/int_converter.h"
 #include "src/stdio/scanf_core/string_converter.h"
 
@@ -60,8 +61,8 @@ int convert(Reader *reader, const FormatSection &to_conv) {
       return ret_val;
     return convert_float(reader, to_conv);
 #endif // LLVM_LIBC_SCANF_DISABLE_FLOAT
-    //   case 'n':
-    //     return convert_write_int(reader, to_conv);
+  case 'n':
+    return convert_current_pos(reader, to_conv);
     //   case 'p':
     //     ret_val = raw_match(reader, " ");
     //     if (ret_val != READ_OK)

diff  --git a/libc/src/stdio/scanf_core/converter_utils.h b/libc/src/stdio/scanf_core/converter_utils.h
new file mode 100644
index 0000000000000..07ac9c7407ed3
--- /dev/null
+++ b/libc/src/stdio/scanf_core/converter_utils.h
@@ -0,0 +1,107 @@
+//===-- Format specifier converter for scanf -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H
+
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/str_to_float.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+LIBC_INLINE constexpr char to_lower(char a) { return a | 32; }
+
+LIBC_INLINE constexpr int b36_char_to_int(char input) {
+  if (internal::isdigit(input))
+    return input - '0';
+  if (internal::isalpha(input))
+    return to_lower(input) + 10 - 'a';
+  return 0;
+}
+
+LIBC_INLINE void write_int_with_length(uintmax_t output_val,
+                                       const FormatSection &to_conv) {
+  if ((to_conv.flags & NO_WRITE) != 0) {
+    return;
+  }
+  LengthModifier lm = to_conv.length_modifier;
+  void *output_ptr = to_conv.output_ptr;
+  switch (lm) {
+  case (LengthModifier::hh):
+    *reinterpret_cast<unsigned char *>(output_ptr) =
+        static_cast<unsigned char>(output_val);
+    break;
+  case (LengthModifier::h):
+    *reinterpret_cast<unsigned short *>(output_ptr) =
+        static_cast<unsigned short>(output_val);
+    break;
+  case (LengthModifier::NONE):
+    *reinterpret_cast<unsigned int *>(output_ptr) =
+        static_cast<unsigned int>(output_val);
+    break;
+  case (LengthModifier::l):
+    *reinterpret_cast<unsigned long *>(output_ptr) =
+        static_cast<unsigned long>(output_val);
+    break;
+  case (LengthModifier::ll):
+  case (LengthModifier::L):
+    *reinterpret_cast<unsigned long long *>(output_ptr) =
+        static_cast<unsigned long long>(output_val);
+    break;
+  case (LengthModifier::j):
+    *reinterpret_cast<uintmax_t *>(output_ptr) =
+        static_cast<uintmax_t>(output_val);
+    break;
+  case (LengthModifier::z):
+    *reinterpret_cast<size_t *>(output_ptr) = static_cast<size_t>(output_val);
+    break;
+  case (LengthModifier::t):
+    *reinterpret_cast<ptr
diff _t *>(output_ptr) =
+        static_cast<ptr
diff _t>(output_val);
+    break;
+  }
+}
+
+LIBC_INLINE void write_float_with_length(char *str,
+                                         const FormatSection &to_conv) {
+  if ((to_conv.flags & NO_WRITE) != 0) {
+    return;
+  }
+
+  void *output_ptr = to_conv.output_ptr;
+
+  LengthModifier lm = to_conv.length_modifier;
+  switch (lm) {
+  case (LengthModifier::l): {
+    auto value = internal::strtofloatingpoint<double>(str, nullptr);
+    *reinterpret_cast<double *>(output_ptr) = value;
+    break;
+  }
+  case (LengthModifier::L): {
+    auto value = internal::strtofloatingpoint<long double>(str, nullptr);
+    *reinterpret_cast<long double *>(output_ptr) = value;
+    break;
+  }
+  default: {
+    auto value = internal::strtofloatingpoint<float>(str, nullptr);
+    *reinterpret_cast<float *>(output_ptr) = value;
+    break;
+  }
+  }
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CONVERTER_UTILS_H

diff  --git a/libc/src/stdio/scanf_core/current_pos_converter.h b/libc/src/stdio/scanf_core/current_pos_converter.h
new file mode 100644
index 0000000000000..be4b6553b89dc
--- /dev/null
+++ b/libc/src/stdio/scanf_core/current_pos_converter.h
@@ -0,0 +1,31 @@
+//===-- Current position specifier converter for scanf ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H
+#define LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H
+
+#include "src/__support/common.h"
+#include "src/stdio/scanf_core/converter_utils.h"
+#include "src/stdio/scanf_core/core_structs.h"
+#include "src/stdio/scanf_core/reader.h"
+
+#include <stddef.h>
+
+namespace __llvm_libc {
+namespace scanf_core {
+
+LIBC_INLINE int convert_current_pos(Reader *reader,
+                                    const FormatSection &to_conv) {
+  write_int_with_length(reader->chars_read(), to_conv);
+  return READ_OK;
+}
+
+} // namespace scanf_core
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDIO_SCANF_CORE_CURRENT_POS_CONVERTER_H

diff  --git a/libc/src/stdio/scanf_core/float_converter.cpp b/libc/src/stdio/scanf_core/float_converter.cpp
index 1a0ce42863f35..5ffb9b09fcfc2 100644
--- a/libc/src/stdio/scanf_core/float_converter.cpp
+++ b/libc/src/stdio/scanf_core/float_converter.cpp
@@ -11,7 +11,7 @@
 #include "src/__support/CPP/limits.h"
 #include "src/__support/char_vector.h"
 #include "src/__support/ctype_utils.h"
-#include "src/__support/str_to_float.h"
+#include "src/stdio/scanf_core/converter_utils.h"
 #include "src/stdio/scanf_core/core_structs.h"
 #include "src/stdio/scanf_core/reader.h"
 
@@ -20,35 +20,6 @@
 namespace __llvm_libc {
 namespace scanf_core {
 
-constexpr char inline to_lower(char a) { return a | 32; }
-
-void write_with_length(char *str, const FormatSection &to_conv) {
-  if ((to_conv.flags & NO_WRITE) != 0) {
-    return;
-  }
-
-  void *output_ptr = to_conv.output_ptr;
-
-  LengthModifier lm = to_conv.length_modifier;
-  switch (lm) {
-  case (LengthModifier::l): {
-    auto value = internal::strtofloatingpoint<double>(str, nullptr);
-    *reinterpret_cast<double *>(output_ptr) = value;
-    break;
-  }
-  case (LengthModifier::L): {
-    auto value = internal::strtofloatingpoint<long double>(str, nullptr);
-    *reinterpret_cast<long double *>(output_ptr) = value;
-    break;
-  }
-  default: {
-    auto value = internal::strtofloatingpoint<float>(str, nullptr);
-    *reinterpret_cast<float *>(output_ptr) = value;
-    break;
-  }
-  }
-}
-
 // All of the floating point conversions are the same for scanf, every name will
 // accept every style.
 int convert_float(Reader *reader, const FormatSection &to_conv) {
@@ -96,7 +67,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
     }
 
     if (inf_index == 3 || inf_index == sizeof(inf_string) - 1) {
-      write_with_length(out_str.c_str(), to_conv);
+      write_float_with_length(out_str.c_str(), to_conv);
       return READ_OK;
     } else {
       return MATCHING_FAILURE;
@@ -119,7 +90,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
     }
 
     if (nan_index == sizeof(nan_string) - 1) {
-      write_with_length(out_str.c_str(), to_conv);
+      write_float_with_length(out_str.c_str(), to_conv);
       return READ_OK;
     } else {
       return MATCHING_FAILURE;
@@ -138,7 +109,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
     }
     // If we've hit the end, then this is "0", which is valid.
     if (out_str.length() == max_width) {
-      write_with_length(out_str.c_str(), to_conv);
+      write_float_with_length(out_str.c_str(), to_conv);
       return READ_OK;
     } else {
       cur_char = reader->getc();
@@ -154,7 +125,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
       // If we've hit the end here, we have "0x" which is a valid prefix to a
       // floating point number, and will be evaluated to 0.
       if (out_str.length() == max_width) {
-        write_with_length(out_str.c_str(), to_conv);
+        write_float_with_length(out_str.c_str(), to_conv);
         return READ_OK;
       } else {
         cur_char = reader->getc();
@@ -246,7 +217,7 @@ int convert_float(Reader *reader, const FormatSection &to_conv) {
   if (!is_number) {
     return MATCHING_FAILURE;
   }
-  write_with_length(out_str.c_str(), to_conv);
+  write_float_with_length(out_str.c_str(), to_conv);
 
   return READ_OK;
 }

diff  --git a/libc/src/stdio/scanf_core/int_converter.cpp b/libc/src/stdio/scanf_core/int_converter.cpp
index be88a01f942d7..ce23f5d3fed8d 100644
--- a/libc/src/stdio/scanf_core/int_converter.cpp
+++ b/libc/src/stdio/scanf_core/int_converter.cpp
@@ -10,6 +10,7 @@
 
 #include "src/__support/CPP/limits.h"
 #include "src/__support/ctype_utils.h"
+#include "src/stdio/scanf_core/converter_utils.h"
 #include "src/stdio/scanf_core/core_structs.h"
 #include "src/stdio/scanf_core/reader.h"
 
@@ -18,58 +19,6 @@
 namespace __llvm_libc {
 namespace scanf_core {
 
-constexpr char inline to_lower(char a) { return a | 32; }
-
-constexpr inline int b36_char_to_int(char input) {
-  if (internal::isdigit(input))
-    return input - '0';
-  if (internal::isalpha(input))
-    return to_lower(input) + 10 - 'a';
-  return 0;
-}
-
-void write_with_length(uintmax_t output_val, const FormatSection &to_conv) {
-  if ((to_conv.flags & NO_WRITE) != 0) {
-    return;
-  }
-  LengthModifier lm = to_conv.length_modifier;
-  void *output_ptr = to_conv.output_ptr;
-  switch (lm) {
-  case (LengthModifier::hh):
-    *reinterpret_cast<unsigned char *>(output_ptr) =
-        static_cast<unsigned char>(output_val);
-    break;
-  case (LengthModifier::h):
-    *reinterpret_cast<unsigned short *>(output_ptr) =
-        static_cast<unsigned short>(output_val);
-    break;
-  case (LengthModifier::NONE):
-    *reinterpret_cast<unsigned int *>(output_ptr) =
-        static_cast<unsigned int>(output_val);
-    break;
-  case (LengthModifier::l):
-    *reinterpret_cast<unsigned long *>(output_ptr) =
-        static_cast<unsigned long>(output_val);
-    break;
-  case (LengthModifier::ll):
-  case (LengthModifier::L):
-    *reinterpret_cast<unsigned long long *>(output_ptr) =
-        static_cast<unsigned long long>(output_val);
-    break;
-  case (LengthModifier::j):
-    *reinterpret_cast<uintmax_t *>(output_ptr) =
-        static_cast<uintmax_t>(output_val);
-    break;
-  case (LengthModifier::z):
-    *reinterpret_cast<size_t *>(output_ptr) = static_cast<size_t>(output_val);
-    break;
-  case (LengthModifier::t):
-    *reinterpret_cast<ptr
diff _t *>(output_ptr) =
-        static_cast<ptr
diff _t>(output_val);
-    break;
-  }
-}
-
 // This code is very similar to the code in __support/str_to_integer.h but is
 // not quite the same. Here is the list of 
diff erences and why they exist:
 //  1) This takes a reader and a format section instead of a char* and the base.
@@ -150,7 +99,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
     } else {
       // If the max width has been hit already, then the return value must be 0
       // since no actual digits of the number have been parsed yet.
-      write_with_length(0, to_conv);
+      write_int_with_length(0, to_conv);
       return MATCHING_FAILURE;
     }
   }
@@ -168,7 +117,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
         --max_width;
         cur_char = reader->getc();
       } else {
-        write_with_length(0, to_conv);
+        write_int_with_length(0, to_conv);
         return READ_OK;
       }
 
@@ -179,7 +128,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
           --max_width;
           cur_char = reader->getc();
         } else {
-          write_with_length(0, to_conv);
+          write_int_with_length(0, to_conv);
           return READ_OK;
         }
 
@@ -196,7 +145,7 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
         // If the first character isn't a valid digit, then there are no valid
         // digits at all. The number is 0.
         reader->ungetc(cur_char);
-        write_with_length(0, to_conv);
+        write_int_with_length(0, to_conv);
         return MATCHING_FAILURE;
       }
     }
@@ -249,12 +198,12 @@ int convert_int(Reader *reader, const FormatSection &to_conv) {
   reader->ungetc(cur_char);
 
   if (has_overflow) {
-    write_with_length(MAX, to_conv);
+    write_int_with_length(MAX, to_conv);
   } else {
     if (is_negative)
       result = -result;
 
-    write_with_length(result, to_conv);
+    write_int_with_length(result, to_conv);
   }
 
   if (!is_number)

diff  --git a/libc/src/stdio/scanf_core/scanf_main.cpp b/libc/src/stdio/scanf_core/scanf_main.cpp
index ed509eca4c66f..5a79d2e624ab0 100644
--- a/libc/src/stdio/scanf_core/scanf_main.cpp
+++ b/libc/src/stdio/scanf_core/scanf_main.cpp
@@ -29,7 +29,10 @@ int scanf_main(Reader *reader, const char *__restrict str,
        cur_section = parser.get_next_section()) {
     if (cur_section.has_conv) {
       ret_val = convert(reader, cur_section);
-      conversions += ret_val == READ_OK ? 1 : 0;
+      // The %n (current position) conversion doesn't increment the number of
+      // assignments.
+      if (cur_section.conv_name != 'n')
+        conversions += ret_val == READ_OK ? 1 : 0;
     } else {
       ret_val = raw_match(reader, cur_section.raw_string);
     }

diff  --git a/libc/test/src/stdio/sscanf_test.cpp b/libc/test/src/stdio/sscanf_test.cpp
index fc67593f57f0b..55671279e1b4d 100644
--- a/libc/test/src/stdio/sscanf_test.cpp
+++ b/libc/test/src/stdio/sscanf_test.cpp
@@ -59,6 +59,20 @@ TEST(LlvmLibcSScanfTest, IntConvSimple) {
   EXPECT_EQ(ret_val, 1);
   EXPECT_EQ(result, 345);
 
+  // 288 characters
+  ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000",
+                                "%d", &result);
+  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(result, int(__llvm_libc::cpp::numeric_limits<intmax_t>::max()));
+
   ret_val = __llvm_libc::sscanf("Not an integer", "%d", &result);
   EXPECT_EQ(ret_val, 0);
 }
@@ -445,11 +459,6 @@ TEST(LlvmLibcSScanfTest, FloatConvComplexParsing) {
   EXPECT_FP_EQ(result, 1.2);
 }
 
-/*
-TODO:
-  Max width tests
-*/
-
 TEST(LlvmLibcSScanfTest, FloatConvMaxWidth) {
   int ret_val;
   float result = 0;
@@ -572,6 +581,54 @@ TEST(LlvmLibcSScanfTest, FloatConvNoWrite) {
   EXPECT_EQ(ret_val, 0);
 }
 
+TEST(LlvmLibcSScanfTest, CurPosCombined) {
+  int ret_val;
+  int result = -1;
+  char c_result = 0;
+
+  ret_val = __llvm_libc::sscanf("some text", "%n", &result);
+  // %n doesn't count as a conversion for the return value.
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, 0);
+
+  ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, 5);
+
+  ret_val = __llvm_libc::sscanf("1234567890", "12345%n", &result);
+  EXPECT_EQ(ret_val, 0);
+  EXPECT_EQ(result, 5);
+
+  // 288 characters
+  ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000",
+                                "%*d%hhn", &c_result);
+  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(c_result, char(288)); // Overflow is handled by casting.
+
+  // 320 characters
+  ret_val = __llvm_libc::sscanf("10000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000"
+                                "00000000000000000000000000000000",
+                                "%*d%n", &result);
+  EXPECT_EQ(ret_val, 1);
+  EXPECT_EQ(result, 320);
+}
+
 TEST(LlvmLibcSScanfTest, CombinedConv) {
   int ret_val;
   int result = 0;