[libcxx-commits] [libc] [libcxx] [llvm] [libcxx][libc] Hand in Hand PoC with from_chars (PR #91651)

Fri Aug 16 15:27:57 PDT 2024

================
@@ -0,0 +1,337 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
+#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
+
+// This header is in the shared LLVM-libc header library.
+#include "shared/str_to_float.h"
+
+#include <__assert>
+#include <__config>
+#include <cctype>
+#include <charconv>
+#include <concepts>
+#include <limits>
+#include <type_traits>
+
+// Included for the _Floating_type_traits class
+#include "to_chars_floating_point.h"
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+// Parses an infinity string.
+// Valid strings are case insentitive and contain INF or INFINITY.
+//
+// - __first is the first argument to std::from_chars. When the string is invalid
+//   this value is returned as ptr in the result.
+// - __last is the last argument of std::from_chars.
+// - __value is the value argument of std::from_chars,
+// - __ptr is the current position is the input string. This is points beyond
+//   the initial I character.
+// - __negative whether a valid string represents -inf or +inf.
+template <floating_point _Fp>
+from_chars_result __from_chars_floating_point_inf(
+    const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
+  if (__last - __ptr < 2) [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  if (std::tolower(__ptr[0]) != 'n' || std::tolower(__ptr[1]) != 'f') [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  __ptr += 2;
+
+  // At this point the result is valid and contains INF.
+  // When the remaining part contains INITY this will be consumed. Otherwise
+  // only INF is consumed. For example INFINITZ will consume INF and ignore
+  // INITZ.
+
+  if (__last - __ptr >= 5              //
+      && std::tolower(__ptr[0]) == 'i' //
+      && std::tolower(__ptr[1]) == 'n' //
+      && std::tolower(__ptr[2]) == 'i' //
+      && std::tolower(__ptr[3]) == 't' //
+      && std::tolower(__ptr[4]) == 'y')
+    __ptr += 5;
+
+  if constexpr (numeric_limits<_Fp>::has_infinity) {
+    if (__negative)
+      __value = -std::numeric_limits<_Fp>::infinity();
+    else
+      __value = std::numeric_limits<_Fp>::infinity();
+
+    return {__ptr, std::errc{}};
+  } else {
+    return {__ptr, errc::result_out_of_range};
+  }
+}
+
+// Parses a nan string.
+// Valid strings are case insentitive and contain INF or INFINITY.
+//
+// - __first is the first argument to std::from_chars. When the string is invalid
+//   this value is returned as ptr in the result.
+// - __last is the last argument of std::from_chars.
+// - __value is the value argument of std::from_chars,
+// - __ptr is the current position is the input string. This is points beyond
+//   the initial N character.
+// - __negative whether a valid string represents -nan or +nan.
+template <floating_point _Fp>
+from_chars_result __from_chars_floating_point_nan(
+    const char* const __first, const char* __last, _Fp& __value, const char* __ptr, bool __negative) {
+  if (__last - __ptr < 2) [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  if (std::tolower(__ptr[0]) != 'a' || std::tolower(__ptr[1]) != 'n') [[unlikely]]
+    return {__first, errc::invalid_argument};
+
+  __ptr += 2;
+
+  // At this point the result is valid and contains NAN. When the remaining
+  // part contains ( n-char-sequence_opt ) this will be consumed. Otherwise
+  // only NAN is consumed. For example NAN(abcd will consume NAN and ignore
+  // (abcd.
+  if (__last - __ptr >= 2 && __ptr[0] == '(') {
+    size_t __offset = 1;
+    do {
+      if (__ptr[__offset] == ')') {
+        __ptr += __offset + 1;
+        break;
+      }
+      if (__ptr[__offset] != '_' && !std::isalnum(__ptr[__offset]))
+        break;
+      ++__offset;
+    } while (__ptr + __offset != __last);
+  }
+
+  if (__negative)
+    __value = -std::numeric_limits<_Fp>::quiet_NaN();
+  else
+    __value = std::numeric_limits<_Fp>::quiet_NaN();
+
+  return {__ptr, std::errc{}};
+}
+
+template <floating_point _Fp>
+from_chars_result __from_chars_floating_point_decimal(
+    const char* const __first,
+    const char* __last,
+    _Fp& __value,
+    chars_format __fmt,
+    const char* __ptr,
+    bool __negative) {
+  using _Traits    = _Floating_type_traits<_Fp>;
+  using _Uint_type = typename _Traits::_Uint_type;
+
+  const char* src  = __ptr; // rename to match the libc code copied for this section.
+  ptrdiff_t length = __last - src;
+  _LIBCPP_ASSERT_INTERNAL(length > 0, "");
+
+  _Uint_type mantissa            = 0;
+  int exponent                   = 0;
+  bool truncated                 = false;
+  bool seen_digit                = false;
+  bool has_valid_exponent        = false;
+  bool after_decimal             = false;
+  size_t index                   = 0;
+  const size_t BASE              = 10;
+  constexpr char EXPONENT_MARKER = 'e';
+  constexpr char DECIMAL_POINT   = '.';
+
+  // The loop fills the mantissa with as many digits as it can hold
+  const _Uint_type bitstype_max_div_by_base = numeric_limits<_Uint_type>::max() / BASE;
+
+  while (index < static_cast<size_t>(length)) {
+    if (LIBC_NAMESPACE::internal::isdigit(src[index])) {
----------------
michaelrj-google wrote:

I've moved these functions to `std::isdigit` for now, but checking the standard from_chars should be locale independent: https://eel.is/c++draft/charconv.from.chars. Is there a no-locale version of isdigit available inside libc++?

https://github.com/llvm/llvm-project/pull/91651