[libc-commits] [libc] [libc] Add max length argument to decimal to float (PR #84091)

via libc-commits libc-commits at lists.llvm.org
Tue Mar 5 21:43:55 PST 2024


================
@@ -38,60 +42,75 @@ LIBC_INLINE int b36_char_to_int(char input) {
 
 // checks if the next 3 characters of the string pointer are the start of a
 // hexadecimal number. Does not advance the string pointer.
-LIBC_INLINE bool is_hex_start(const char *__restrict src) {
+LIBC_INLINE bool
+is_hex_start(const char *__restrict src,
+             size_t src_len = cpp::numeric_limits<size_t>::max()) {
+  if (src_len < 3)
+    return false;
   return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
          b36_char_to_int(*(src + 2)) < 16;
 }
 
+struct BaseAndLen {
+  int base;
+  size_t len;
+};
+
 // Takes the address of the string pointer and parses the base from the start of
 // it. This function will advance |src| to the first valid digit in the inferred
 // base.
-LIBC_INLINE int infer_base(const char *__restrict *__restrict src) {
+LIBC_INLINE BaseAndLen infer_base(const char *__restrict src, size_t src_len) {
   // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
   // sequence of the decimal digits and the letters a (or A) through f (or F)
   // with values 10 through 15 respectively." (C standard 6.4.4.1)
-  if (is_hex_start(*src)) {
-    (*src) += 2;
-    return 16;
+  if (is_hex_start(src, src_len)) {
+    return {16, 2};
   } // An octal number is defined as "the prefix 0 optionally followed by a
     // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
     // number that starts with 0, including just 0, is an octal number.
-  else if (**src == '0') {
-    return 8;
+  else if (src_len > 0 && src[0] == '0') {
+    return {8, 0};
   } // A decimal number is defined as beginning "with a nonzero digit and
     // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
   else {
-    return 10;
+    return {10, 0};
   }
 }
 
 // Takes a pointer to a string and the base to convert to. This function is used
 // as the backend for all of the string to int functions.
 template <class T>
-LIBC_INLINE StrToNumResult<T> strtointeger(const char *__restrict src,
-                                           int base) {
+LIBC_INLINE StrToNumResult<T>
+strtointeger(const char *__restrict src, int base,
+             const size_t src_len = cpp::numeric_limits<size_t>::max()) {
+  // TODO: Rewrite to support numbers longer than long long
   unsigned long long result = 0;
   bool is_number = false;
-  const char *original_src = src;
+  size_t src_cur = 0;
   int error_val = 0;
 
+  if (src_len == 0)
+    return {0, 0, 0};
+
   if (base < 0 || base == 1 || base > 36) {
     error_val = EINVAL;
     return {0, 0, error_val};
   }
 
-  src = first_non_whitespace(src);
+  src_cur = first_non_whitespace(src, src_len) - src;
 
   char result_sign = '+';
-  if (*src == '+' || *src == '-') {
-    result_sign = *src;
-    ++src;
+  if (src[src_cur] == '+' || src[src_cur] == '-') {
+    result_sign = src[src_cur];
+    ++src_cur;
   }
 
   if (base == 0) {
-    base = infer_base(&src);
-  } else if (base == 16 && is_hex_start(src)) {
-    src = src + 2;
+    auto base_and_len = infer_base(src + src_cur, src_len - src_cur);
+    base = base_and_len.base;
+    src_cur += base_and_len.len;
+  } else if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur)) {
+    src_cur = src_cur + 2;
   }
----------------
lntue wrote:

`is_hex_start` is quite cheap anyway, so probably you can restructure it a bit, so that `infer_base` doesn't need to return `.len`, and you won't need the temporary struct `BaseAndLen`, as follow:
```
if (base == 0)
  base = infer_base(...);
if (base == 16 && is_hex_start(...))
  src_curr += 2;
```

https://github.com/llvm/llvm-project/pull/84091


More information about the libc-commits mailing list