[libc-commits] [libc] [libc]: Implement strfrom* functions and utils (PR #85438)

Tue Mar 19 03:09:47 PDT 2024

https://github.com/vinayakdsci updated https://github.com/llvm/llvm-project/pull/85438

>From 453695de978135647bc35fd4c14e07f3e532632f Mon Sep 17 00:00:00 2001
From: Vinayak Dev <vinayakdev.sci at gmail.com>
Date: Fri, 15 Mar 2024 23:00:57 +0530
Subject: [PATCH] [libc]: Implement strfrom* functions and utils

---
 libc/config/linux/x86_64/entrypoints.txt |   1 +
 libc/spec/stdc.td                        |   2 +
 libc/src/stdlib/CMakeLists.txt           |  22 ++++
 libc/src/stdlib/str_from_util.h          | 138 +++++++++++++++++++++++
 libc/src/stdlib/strfromf.cpp             |  42 +++++++
 libc/src/stdlib/strfromf.h               |  21 ++++
 libc/test/src/stdlib/CMakeLists.txt      |  11 +-
 libc/test/src/stdlib/strfromf_test.cpp   | 107 ++++++++++++++++++
 8 files changed, 343 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/stdlib/str_from_util.h
 create mode 100644 libc/src/stdlib/strfromf.cpp
 create mode 100644 libc/src/stdlib/strfromf.h
 create mode 100644 libc/test/src/stdlib/strfromf_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index f81d334e9e788d..73f2b2ea780887 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -180,6 +180,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.qsort_r
     libc.src.stdlib.rand
     libc.src.stdlib.srand
+    libc.src.stdlib.strfromf
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 84d28cc3350304..920036adfed5c1 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -956,6 +956,8 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"rand", RetValSpec<IntType>, [ArgSpec<VoidType>]>,
           FunctionSpec<"srand", RetValSpec<VoidType>, [ArgSpec<UnsignedIntType>]>,
 
+          FunctionSpec<"strfromf", RetValSpec<IntType>, [ArgSpec<CharRestrictedPtr>, ArgSpec<SizeTType>, ArgSpec<ConstCharRestrictedPtr>, ArgSpec<FloatType>]>,
+
           FunctionSpec<"strtof", RetValSpec<FloatType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
           FunctionSpec<"strtod", RetValSpec<DoubleType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
           FunctionSpec<"strtold", RetValSpec<LongDoubleType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index bd0bcffe0045d1..22f7f990fb08a8 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -52,6 +52,28 @@ add_entrypoint_object(
     libc.config.linux.app_h
 )
 
+add_entrypoint_object(
+  strfromf
+  SRCS
+    strfromf.cpp
+  HDRS
+    strfromf.h
+  DEPENDS
+    .str_from_util
+)
+
+add_header_library(
+  str_from_util
+  HDRS
+    str_from_util.h
+  DEPENDS
+    libc.src.stdio.printf_core.converter
+    libc.src.stdio.printf_core.core_structs
+    libc.src.stdio.printf_core.writer
+    libc.src.__support.str_to_integer
+    libc.src.__support.CPP.type_traits
+)
+
 add_entrypoint_object(
   strtof
   SRCS
diff --git a/libc/src/stdlib/str_from_util.h b/libc/src/stdlib/str_from_util.h
new file mode 100644
index 00000000000000..c4c1c0a0ba4ef0
--- /dev/null
+++ b/libc/src/stdlib/str_from_util.h
@@ -0,0 +1,138 @@
+//===-- Implementation header for strfromx() utilitites -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// According to the C23 standard, any input character sequences except a
+// precision specifier and the usual floating point formats, namely
+// %{a,A,e,E,f,F,g,G}, are not allowed and any code that does otherwise results
+// in undefined behaviour(including use of a '%%' conversion specifier); which
+// in this case is that the buffer string is simply populated with the format
+// string. The case of the input being NULL should be handled in the calling
+// function (strfromf, strfromd, strfroml) itself.
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
+#define LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
+
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/str_to_integer.h"
+#include "src/stdio/printf_core/converter_atlas.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE::internal {
+
+template <typename T>
+using storage_type = typename fputil::FPBits<T>::StorageType;
+
+template <typename T>
+printf_core::FormatSection parse_format_string(const char *__restrict format,
+                                               T fp) {
+  printf_core::FormatSection section;
+  size_t cur_pos = 0;
+
+  // There is no typed conversion function to convert single precision float
+  // to hex exponential format, and the function convert_float_hex_exp()
+  // requires a double or long double value to work correctly.
+  // To work around this, we convert fp to double if it is single precision, and
+  // then use that double precision value in the %{A, a} conversion specifiers.
+  [[maybe_unused]] double new_fp;
+  bool t_is_single_prec_type = cpp::is_same<T, float>::value;
+  if (t_is_single_prec_type)
+    new_fp = (double)fp;
+
+  if (format[cur_pos] == '%') {
+    section.has_conv = true;
+    ++cur_pos;
+
+    // handle precision
+    section.precision = -1;
+    if (format[cur_pos] == '.') {
+      ++cur_pos;
+      section.precision = 0;
+
+      // The standard does not allow the '*' (asterisk) operator for strfromx()
+      // functions
+      if (internal::isdigit(format[cur_pos])) {
+        auto result = internal::strtointeger<int>(format + cur_pos, 10);
+        section.precision += result.value;
+        cur_pos += result.parsed_len;
+      }
+    }
+
+    section.conv_name = format[cur_pos];
+    switch (format[cur_pos]) {
+    case 'a':
+    case 'A':
+      if (t_is_single_prec_type)
+        section.conv_val_raw = cpp::bit_cast<storage_type<double>>(new_fp);
+      else
+        section.conv_val_raw = cpp::bit_cast<storage_type<T>>(fp);
+      break;
+    case 'e':
+    case 'E':
+    case 'f':
+    case 'F':
+    case 'g':
+    case 'G':
+      section.conv_val_raw = cpp::bit_cast<storage_type<T>>(fp);
+      break;
+    default:
+      section.has_conv = false;
+      while (format[cur_pos] != '\0')
+        ++cur_pos;
+      break;
+    }
+
+    if (format[cur_pos] != '\0')
+      ++cur_pos;
+  } else {
+    section.has_conv = false;
+    // We are looking for exactly one section, so no more '%'
+    while (format[cur_pos] != '\0')
+      ++cur_pos;
+  }
+
+  section.raw_string = {format, cur_pos};
+  return section;
+}
+
+template <typename T>
+int strfromfloat_convert(printf_core::Writer *writer,
+                         const printf_core::FormatSection &section) {
+  if (!section.has_conv)
+    return writer->write(section.raw_string);
+
+  auto res = static_cast<storage_type<T>>(section.conv_val_raw);
+
+  fputil::FPBits<T> strfromfloat_bits(res);
+  if (strfromfloat_bits.is_inf_or_nan())
+    return convert_inf_nan(writer, section);
+
+  switch (section.conv_name) {
+  case 'f':
+  case 'F':
+    return convert_float_decimal_typed(writer, section, strfromfloat_bits);
+  case 'e':
+  case 'E':
+    return convert_float_dec_exp_typed(writer, section, strfromfloat_bits);
+  case 'a':
+  case 'A':
+    return convert_float_hex_exp(writer, section);
+  case 'g':
+  case 'G':
+    return convert_float_dec_auto_typed(writer, section, strfromfloat_bits);
+  default:
+    return writer->write(section.raw_string);
+  }
+  return -1;
+}
+
+} // namespace LIBC_NAMESPACE::internal
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
diff --git a/libc/src/stdlib/strfromf.cpp b/libc/src/stdlib/strfromf.cpp
new file mode 100644
index 00000000000000..40eff87eb454c5
--- /dev/null
+++ b/libc/src/stdlib/strfromf.cpp
@@ -0,0 +1,42 @@
+//===-- Implementation of strfromf ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strfromf.h"
+#include "src/stdlib/str_from_util.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, strfromf,
+                   (char *__restrict s, size_t n, const char *__restrict format,
+                    float fp)) {
+  LIBC_ASSERT(s != nullptr);
+
+  printf_core::FormatSection section =
+      internal::parse_format_string(format, fp);
+  printf_core::WriteBuffer wb(s, (n > 0 ? n - 1 : 0));
+  printf_core::Writer writer(&wb);
+
+  int result = 0;
+  if (section.has_conv)
+    result = internal::strfromfloat_convert<float>(&writer, section);
+  else
+    result = writer.write(section.raw_string);
+
+  if (result < 0)
+    return result;
+
+  if (n > 0)
+    wb.buff[wb.buff_cur] = '\0';
+
+  return writer.get_chars_written();
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/strfromf.h b/libc/src/stdlib/strfromf.h
new file mode 100644
index 00000000000000..b551a58af05a3e
--- /dev/null
+++ b/libc/src/stdlib/strfromf.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for strfromf ------------------------*- C++--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRFROMF_H
+#define LLVM_LIBC_SRC_STDLIB_STRFROMF_H
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE {
+
+int strfromf(char *__restrict s, size_t n, const char *__restrict format,
+             float fp);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRTOF_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 5488a61c4ef187..cb42bc56f51c5f 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -168,6 +168,16 @@ add_libc_test(
     .strtol_test_support
 )
 
+add_libc_test(
+  strfromf_test
+  SUITE
+    libc-stdlib-tests
+  SRCS
+    strfromf_test.cpp
+  DEPENDS
+    libc.src.stdlib.strfromf
+)
+
 add_libc_test(
   abs_test
   SUITE
@@ -259,7 +269,6 @@ add_libc_test(
     libc.src.stdlib.qsort
 )
 
-
 add_libc_test(
   qsort_r_test
   SUITE
diff --git a/libc/test/src/stdlib/strfromf_test.cpp b/libc/test/src/stdlib/strfromf_test.cpp
new file mode 100644
index 00000000000000..c5489f5f3af2ac
--- /dev/null
+++ b/libc/test/src/stdlib/strfromf_test.cpp
@@ -0,0 +1,107 @@
+//===-- Unittests for strfromf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strfromf.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcStrfromfTest, DecimalFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 16, "%f", 1.0);
+  EXPECT_EQ(written, 8);
+  ASSERT_STREQ(buff, "1.000000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%f", 1234567890.0);
+  EXPECT_EQ(written, 17);
+  ASSERT_STREQ(buff, "1234567936.000000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 5, "%f", 1234567890.0);
+  EXPECT_EQ(written, 17);
+  ASSERT_STREQ(buff, "1234");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 67, "%.3f", 1.0);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "1.000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%1f", 1234567890.0);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "%1f");
+}
+
+TEST(LlvmLibcStrfromfTest, HexExpFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 0, "%a", 1234567890.0);
+  EXPECT_EQ(written, 14);
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%a", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "0x1.26580cp+30");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%A", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "0X1.26580CP+30");
+}
+
+TEST(LlvmLibcStrfromfTest, DecimalExpFloatFormat) {
+  char buff[100];
+  int written;
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9e", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.234567936e+09");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9E", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.234567936E+09");
+}
+
+TEST(LlvmLibcStrfromfTest, AutoDecimalFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9g", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "1.23456794e+09");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9G", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "1.23456794E+09");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 0, "%G", 1.0);
+  EXPECT_EQ(written, 1);
+}
+
+TEST(LlvmLibcStrfromfTest, ImproperFormatString) {
+
+  char buff[100];
+  int retval;
+  retval = LIBC_NAMESPACE::strfromf(
+      buff, 37, "A simple string with no conversions.", 1.0);
+  EXPECT_EQ(retval, 36);
+  ASSERT_STREQ(buff, "A simple string with no conversions.");
+
+  retval = LIBC_NAMESPACE::strfromf(
+      buff, 37, "%A simple string with one conversion, should overwrite.", 1.0);
+  EXPECT_EQ(retval, 6);
+  ASSERT_STREQ(buff, "0X1P+0");
+
+  retval = LIBC_NAMESPACE::strfromf(buff, 74,
+                                    "A simple string with one conversion in %A "
+                                    "between, writes string as it is",
+                                    1.0);
+  EXPECT_EQ(retval, 73);
+  ASSERT_STREQ(buff, "A simple string with one conversion in %A between, "
+                     "writes string as it is");
+
+  retval = LIBC_NAMESPACE::strfromf(buff, 36,
+                                    "A simple string with one conversion", 1.0);
+  EXPECT_EQ(retval, 35);
+  ASSERT_STREQ(buff, "A simple string with one conversion");
+}