[libc-commits] [libc] [libc]: Implement strfrom* functions and utils (PR #85438)

Vinayak Dev via libc-commits libc-commits at lists.llvm.org
Fri Mar 15 10:40:32 PDT 2024


https://github.com/vinayakdsci created https://github.com/llvm/llvm-project/pull/85438

Fixes #84244.

Still a Work-In-Progress

>From d4046e89f15478b942cd5b78c863b7eff5aaed7c Mon Sep 17 00:00:00 2001
From: Vinayak Dev <vinayakdev.sci at gmail.com>
Date: Fri, 15 Mar 2024 23:00:57 +0530
Subject: [PATCH] [libc]: Implement strfrom* functions and utils

---
 libc/config/linux/x86_64/entrypoints.txt |   1 +
 libc/spec/stdc.td                        |   2 +
 libc/src/stdlib/CMakeLists.txt           |  21 ++++
 libc/src/stdlib/str_from_util.h          | 132 +++++++++++++++++++++++
 libc/src/stdlib/strfromf.cpp             |  44 ++++++++
 libc/src/stdlib/strfromf.h               |  21 ++++
 libc/test/src/stdlib/CMakeLists.txt      |  11 +-
 libc/test/src/stdlib/strfromf_test.cpp   |  94 ++++++++++++++++
 8 files changed, 325 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/stdlib/str_from_util.h
 create mode 100644 libc/src/stdlib/strfromf.cpp
 create mode 100644 libc/src/stdlib/strfromf.h
 create mode 100644 libc/test/src/stdlib/strfromf_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a8b416aa9a0cda..80eece5cbaf862 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -180,6 +180,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdlib.qsort_r
     libc.src.stdlib.rand
     libc.src.stdlib.srand
+    libc.src.stdlib.strfromf
     libc.src.stdlib.strtod
     libc.src.stdlib.strtof
     libc.src.stdlib.strtol
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 2bc9bc8b9b1a6f..489a5e88233ff0 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -946,6 +946,8 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"rand", RetValSpec<IntType>, [ArgSpec<VoidType>]>,
           FunctionSpec<"srand", RetValSpec<VoidType>, [ArgSpec<UnsignedIntType>]>,
 
+          FunctionSpec<"strfromf", RetValSpec<IntType>, [ArgSpec<CharRestrictedPtr>, ArgSpec<SizeTType>, ArgSpec<ConstCharRestrictedPtr>, ArgSpec<FloatType>]>,
+
           FunctionSpec<"strtof", RetValSpec<FloatType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
           FunctionSpec<"strtod", RetValSpec<DoubleType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
           FunctionSpec<"strtold", RetValSpec<LongDoubleType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>]>,
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index bd0bcffe0045d1..1d9da1afbad851 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -52,6 +52,27 @@ add_entrypoint_object(
     libc.config.linux.app_h
 )
 
+add_entrypoint_object(
+  strfromf
+  SRCS
+    strfromf.cpp
+  HDRS
+    strfromf.h
+  DEPENDS
+    .str_from_util
+)
+
+add_header_library(
+  str_from_util
+  HDRS
+    str_from_util.h
+  DEPENDS
+    libc.src.stdio.printf_core.converter_atlas
+    libc.src.stdio.printf_core.core_structs
+    libc.src.stdio.printf_core.writer
+    libc.src.__support.str_to_integer
+)
+
 add_entrypoint_object(
   strtof
   SRCS
diff --git a/libc/src/stdlib/str_from_util.h b/libc/src/stdlib/str_from_util.h
new file mode 100644
index 00000000000000..33597d1ea77a6f
--- /dev/null
+++ b/libc/src/stdlib/str_from_util.h
@@ -0,0 +1,132 @@
+//===-- Implementation header for strfromx() utilitites -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// According to the C23 standard, any input character sequences except a
+// precision specifier and the usual floating point formats, namely
+// %{a,A,e,E,f,F,g,G}, are not allowed and any code that does otherwise results
+// in undefined behaviour; which in this case is that the buffer string is
+// simply populated with the format string. The case of the value of buffer size
+// being 0 or the input being NULL should be handled in the calling function
+// (strfromf, strfromd, strfroml) itself.
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
+#define LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
+
+#include "src/__support/str_to_integer.h"
+#include "src/stdio/printf_core/converter_atlas.h"
+#include "src/stdio/printf_core/core_structs.h"
+#include "src/stdio/printf_core/writer.h"
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE::internal {
+
+template <typename T> struct type_of { using type = T; };
+
+template <> struct type_of<float> {
+  using type = fputil::FPBits<float>::StorageType;
+};
+template <> struct type_of<double> {
+  using type = fputil::FPBits<double>::StorageType;
+};
+template <> struct type_of<long double> {
+  using type = fputil::FPBits<long double>::StorageType;
+};
+
+template <typename T> using type_of_v = typename type_of<T>::type;
+
+template <typename T>
+printf_core::FormatSection parse_format_string(const char *__restrict format,
+                                               T fp) {
+  printf_core::FormatSection section;
+  size_t cur_pos = 0;
+
+  if (format[cur_pos] == '%') {
+    section.has_conv = true;
+    ++cur_pos;
+
+    // handle precision
+    section.precision = -1;
+    if (format[cur_pos] == '.') {
+      ++cur_pos;
+      section.precision = 0;
+
+      // The standard does not allow the '*' (asterisk) operator for strfromx()
+      // functions
+      if (internal::isdigit(format[cur_pos])) {
+        auto result = internal::strtointeger<int>(format + cur_pos, 10);
+        section.precision += result.value;
+        cur_pos += result.parsed_len;
+      }
+    }
+
+    section.conv_name = format[cur_pos];
+    switch (format[cur_pos]) {
+    case '%':
+      section.has_conv = true;
+      break;
+    case 'a':
+    case 'A':
+    case 'e':
+    case 'E':
+    case 'f':
+    case 'F':
+    case 'g':
+    case 'G':
+      section.conv_val_raw = cpp::bit_cast<type_of_v<T>>(fp);
+      break;
+    default:
+      // error out, invalid format specifier
+      section.has_conv = false;
+      while (format[cur_pos] != '\0')
+        ++cur_pos;
+      break;
+    }
+
+    if (format[cur_pos] != '\0')
+      ++cur_pos;
+  } else {
+    section.has_conv = false;
+    // We are looking for exactly one section, so no more '%'
+    while (format[cur_pos] != '\0')
+      ++cur_pos;
+  }
+
+  section.raw_string = {format, cur_pos};
+  return section;
+}
+
+int convert(const printf_core::FormatSection &section,
+            printf_core::Writer *writer) {
+  if (!section.has_conv)
+    return writer->write(section.raw_string);
+
+  switch (section.conv_name) {
+  case '%':
+    return writer->write("%");
+  case 'f':
+  case 'F':
+    return convert_float_decimal(writer, section);
+  case 'e':
+  case 'E':
+    return convert_float_dec_exp(writer, section);
+  case 'a':
+  case 'A':
+    return convert_float_hex_exp(writer, section);
+  case 'g':
+  case 'G':
+    return convert_float_dec_auto(writer, section);
+  default:
+    return writer->write(section.raw_string);
+  }
+  return -1;
+}
+
+} // namespace LIBC_NAMESPACE::internal
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRFROM_UTIL_H
diff --git a/libc/src/stdlib/strfromf.cpp b/libc/src/stdlib/strfromf.cpp
new file mode 100644
index 00000000000000..fc626e115c57a8
--- /dev/null
+++ b/libc/src/stdlib/strfromf.cpp
@@ -0,0 +1,44 @@
+//===-- Implementation of strfromf ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strfromf.h"
+#include "src/stdlib/str_from_util.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(int, strfromf,
+                   (char *__restrict s, size_t n, const char *__restrict format,
+                    float fp)) {
+
+  if (!s)
+    return 0;
+
+  printf_core::FormatSection section =
+      internal::parse_format_string(format, (double)fp);
+  printf_core::WriteBuffer wb(s, (n > 0 ? n - 1 : 0));
+  printf_core::Writer writer(&wb);
+
+  int result = 0;
+  if (section.has_conv)
+    result = internal::convert(section, &writer);
+  else
+    result = writer.write(section.raw_string);
+
+  if (result < 0)
+    return result;
+
+  if (n > 0)
+    wb.buff[wb.buff_cur] = '\0';
+
+  return writer.get_chars_written();
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/strfromf.h b/libc/src/stdlib/strfromf.h
new file mode 100644
index 00000000000000..caaeb072da2730
--- /dev/null
+++ b/libc/src/stdlib/strfromf.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for strfromf ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRFROMF_H
+#define LLVM_LIBC_SRC_STDLIB_STRFROMF_H
+
+#include <stddef.h>
+
+namespace LIBC_NAMESPACE {
+
+int strfromf(char *__restrict s, size_t n, const char *__restrict format,
+             float fp);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRTOF_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 5488a61c4ef187..cb42bc56f51c5f 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -168,6 +168,16 @@ add_libc_test(
     .strtol_test_support
 )
 
+add_libc_test(
+  strfromf_test
+  SUITE
+    libc-stdlib-tests
+  SRCS
+    strfromf_test.cpp
+  DEPENDS
+    libc.src.stdlib.strfromf
+)
+
 add_libc_test(
   abs_test
   SUITE
@@ -259,7 +269,6 @@ add_libc_test(
     libc.src.stdlib.qsort
 )
 
-
 add_libc_test(
   qsort_r_test
   SUITE
diff --git a/libc/test/src/stdlib/strfromf_test.cpp b/libc/test/src/stdlib/strfromf_test.cpp
new file mode 100644
index 00000000000000..b8311635613fe8
--- /dev/null
+++ b/libc/test/src/stdlib/strfromf_test.cpp
@@ -0,0 +1,94 @@
+//===-- Unittests for strfromf --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strfromf.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcStrfromfTest, DecimalFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 16, "%f", 1.0);
+  EXPECT_EQ(written, 8);
+  ASSERT_STREQ(buff, "1.000000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%f", 1234567890);
+  EXPECT_EQ(written, 17);
+  ASSERT_STREQ(buff, "1234567936.000000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 5, "%f", "1234567890.0");
+  EXPECT_EQ(written, 17);
+  ASSERT_STREQ(buff, "1234");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 67, "%.3f", 1.0);
+  EXPECT_EQ(written, 5);
+  ASSERT_STREQ(buff, "1.000");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%1f", 1234567890.0);
+  EXPECT_EQ(written, 3);
+  ASSERT_STREQ(buff, "%1f");
+
+  written = LIBC_NAMESPACE::strfromf(nullptr, 0, "%f", 1.0);
+  EXPECT_EQ(written, 0);
+}
+
+TEST(LlvmLibcStrfromfTest, HexExpFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 0, "%a", 1234567890.0);
+  EXPECT_EQ(written, 0);
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%a", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "0x1.26580cp+30");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%A", 1234567890.0);
+  EXPECT_EQ(written, 14);
+  ASSERT_STREQ(buff, "0X1.26580CP+30");
+}
+
+TEST(LlvmLibcStrfromfTest, DecimalExpFloatFormat) {
+  char buff[100];
+  int written;
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9e", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.234567936e+09");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9E", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.234567936E+09");
+}
+
+TEST(LlvmLibcStrfromfTest, AutoDecimalFloatFormat) {
+  char buff[100];
+  int written;
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9g", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.23456794e+09");
+
+  written = LIBC_NAMESPACE::strfromf(buff, 20, "%.9G", 1234567890.0);
+  EXPECT_EQ(written, 15);
+  ASSERT_STREQ(buff, "1.23456794E+09");
+}
+
+TEST(LlvmLibcStrfromfTest, ImproperFormatString) {
+
+  char buff[100];
+  int retval;
+  retval = LIBC_NAMESPACE::strfromf(
+      buff, 37, "A simple string with no conversions.", 1.0);
+  EXPECT_EQ(retval, 36);
+  ASSERT_STREQ(buff, "A simple string with no conversions.");
+
+  retval = LIBC_NAMESPACE::strfromf(
+      buff, 37, "%A simple string with one conversion, should overwrite.", 1.0);
+  EXPECT_EQ(retval, 6);
+  ASSERT_STREQ(buff, "0X1P+0");
+}



More information about the libc-commits mailing list