[libc-commits] [libc] b062d63 - [libc] add strtoll function and backend

Michael Jones via libc-commits libc-commits at lists.llvm.org
Wed Aug 11 16:37:29 PDT 2021


Author: Michael Jones
Date: 2021-08-11T23:37:23Z
New Revision: b062d639bb3a315f2b5f410e99b9c3ea10c530cc

URL: https://github.com/llvm/llvm-project/commit/b062d639bb3a315f2b5f410e99b9c3ea10c530cc
DIFF: https://github.com/llvm/llvm-project/commit/b062d639bb3a315f2b5f410e99b9c3ea10c530cc.diff

LOG: [libc] add strtoll function and backend

This change adds the stroll function, but most of the implementation is
in the new file str_conv_utils.h since many of the other integer
conversion functions are implemented through what are effectively calls
to strtoll.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D107792

Added: 
    libc/src/__support/str_conv_utils.h
    libc/src/stdlib/strtoll.cpp
    libc/src/stdlib/strtoll.h
    libc/test/src/stdlib/strtoll_test.cpp

Modified: 
    libc/config/linux/x86_64/entrypoints.txt
    libc/spec/spec.td
    libc/spec/stdc.td
    libc/src/__support/CMakeLists.txt
    libc/src/stdlib/CMakeLists.txt
    libc/test/src/stdlib/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6e9d991d25320..90346b4b3461f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -158,6 +158,7 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdlib.abs
     libc.src.stdlib.labs
     libc.src.stdlib.llabs
+    libc.src.stdlib.strtoll
 
     # signal.h entrypoints
     libc.src.signal.raise

diff  --git a/libc/spec/spec.td b/libc/spec/spec.td
index c78c0f053e06e..e49c759c1f790 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -62,6 +62,7 @@ def ConstVoidRestrictedPtr : ConstType<VoidRestrictedPtr>;
 def CharPtr : PtrType<CharType>;
 def ConstCharPtr : ConstType<CharPtr>;
 def CharRestrictedPtr : RestrictedPtrType<CharType>;
+def CharRestrictedPtrPtr : RestrictedPtrType<CharPtr>;
 def ConstCharRestrictedPtr : ConstType<CharRestrictedPtr>;
 
 def OnceFlagType : NamedType<"once_flag">;

diff  --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index c6621812258cc..eb0ffdfe2e9d2 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -478,6 +478,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"abs", RetValSpec<IntType>, [ArgSpec<IntType>]>,
           FunctionSpec<"labs", RetValSpec<LongType>, [ArgSpec<LongType>]>,
           FunctionSpec<"llabs", RetValSpec<LongLongType>, [ArgSpec<LongLongType>]>,
+          FunctionSpec<"strtoll", RetValSpec<LongLongType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>, ArgSpec<IntType>]>,
           FunctionSpec<"_Exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
       ]
   >;

diff  --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 5135632c377ac..dce6cb871ec56 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -12,6 +12,16 @@ add_header_library(
     ctype_utils.h
 )
 
+add_header_library(
+  str_conv_utils
+  HDRS
+    str_conv_utils.h
+  DEPENDS
+    .ctype_utils
+    libc.include.errno
+    libc.src.errno.__errno_location
+)
+
 add_header_library(
   integer_operations
   HDRS

diff  --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h
new file mode 100644
index 0000000000000..6a83fa6799a77
--- /dev/null
+++ b/libc/src/__support/str_conv_utils.h
@@ -0,0 +1,111 @@
+//===-- Stdlib utils --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC_STDLIB_STDLIB_UTILS_H
+#define LIBC_SRC_STDLIB_STDLIB_UTILS_H
+
+#include "src/__support/ctype_utils.h"
+#include <errno.h>
+#include <limits.h>
+
+namespace __llvm_libc {
+namespace internal {
+
+// Returns a pointer to the first character in src that is not a whitespace
+// character (as determined by isspace())
+static inline const char *first_non_whitespace(const char *__restrict src) {
+  while (internal::isspace(*src)) {
+    ++src;
+  }
+  return src;
+}
+
+static inline int b36_char_to_int(char input) {
+  if (isdigit(input))
+    return input - '0';
+  if (isalpha(input))
+    return (input | 32) + 10 - 'a';
+  return 0;
+}
+
+// Takes the address of the string pointer and parses the base from the start of
+// it. This will advance the string pointer.
+static inline int infer_base(const char *__restrict *__restrict src) {
+  if (**src == '0') {
+    ++(*src);
+    if ((**src | 32) == 'x') {
+      ++(*src);
+      return 16;
+    }
+    return 8;
+  }
+  return 10;
+}
+
+// Takes a pointer to a string, a pointer to a string pointer, and the base to
+// convert to. This function is used as the backend for all of the string to int
+// functions.
+static inline long long strtoll(const char *__restrict src,
+                                char **__restrict str_end, int base) {
+  unsigned long long result = 0;
+
+  if (base < 0 || base == 1 || base > 36) {
+    errno = EINVAL; // NOLINT
+    return 0;
+  }
+
+  src = first_non_whitespace(src);
+
+  char result_sign = '+';
+  if (*src == '+' || *src == '-') {
+    result_sign = *src;
+    ++src;
+  }
+
+  if (base == 0) {
+    base = infer_base(&src);
+  } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') {
+    src = src + 2;
+  }
+
+  unsigned long long const ABS_MAX =
+      (result_sign == '+' ? LLONG_MAX
+                          : static_cast<unsigned long long>(LLONG_MAX) + 1);
+  unsigned long long const ABS_MAX_DIV_BY_BASE = ABS_MAX / base;
+  while (isalnum(*src)) {
+    int cur_digit = b36_char_to_int(*src);
+    if (cur_digit >= base)
+      break;
+    if (result > ABS_MAX_DIV_BY_BASE) {
+      result = ABS_MAX;
+      errno = ERANGE; // NOLINT
+      break;
+    }
+    result = result * base;
+    if (result > ABS_MAX - cur_digit) {
+      result = ABS_MAX;
+      errno = ERANGE; // NOLINT
+      break;
+    }
+    result = result + cur_digit;
+
+    ++src;
+  }
+
+  if (str_end != nullptr)
+    *str_end = const_cast<char *>(src);
+  if (result_sign == '+')
+    return result;
+  else
+    return -result;
+}
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STDLIB_STDLIB_UTILS_H

diff  --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 60a380d1051e9..8457310897f5f 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -50,3 +50,13 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.integer_operations
 )
+
+add_entrypoint_object(
+  strtoll
+  SRCS
+    strtoll.cpp
+  HDRS
+    strtoll.h
+  DEPENDS
+    libc.src.__support.str_conv_utils
+)

diff  --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp
new file mode 100644
index 0000000000000..c99106d7c4232
--- /dev/null
+++ b/libc/src/stdlib/strtoll.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of strtoll -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strtoll.h"
+#include "src/__support/common.h"
+#include "src/__support/str_conv_utils.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(long long, strtoll,
+                   (const char *__restrict str, char **__restrict str_end,
+                    int base)) {
+  return internal::strtoll(str, str_end, base);
+}
+
+} // namespace __llvm_libc

diff  --git a/libc/src/stdlib/strtoll.h b/libc/src/stdlib/strtoll.h
new file mode 100644
index 0000000000000..63d7335b88dbe
--- /dev/null
+++ b/libc/src/stdlib/strtoll.h
@@ -0,0 +1,19 @@
+//===-- Implementation header for strtoll -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRTOLL_H
+#define LLVM_LIBC_SRC_STDLIB_STRTOLL_H
+
+namespace __llvm_libc {
+
+long long strtoll(const char *__restrict str, char **__restrict str_end,
+                  int base);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRTOLL_H

diff  --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index e5cb04a58db1b..d95f95b8d79e2 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -54,3 +54,15 @@ add_libc_unittest(
   DEPENDS
     libc.src.stdlib.llabs
 )
+
+add_libc_unittest(
+  strtoll_test
+  SUITE
+    libc_stdlib_unittests
+  SRCS
+    strtoll_test.cpp
+  DEPENDS
+    libc.src.stdlib.strtoll
+    libc.include.errno
+    libc.test.errno_setter_matcher
+)

diff  --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp
new file mode 100644
index 0000000000000..437d42f5bab80
--- /dev/null
+++ b/libc/test/src/stdlib/strtoll_test.cpp
@@ -0,0 +1,283 @@
+//===-- Unittests for strtoll ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strtoll.h"
+
+#include "utils/UnitTest/Test.h"
+
+#include <errno.h>
+#include <limits.h>
+
+TEST(LlvmLibcStrToLLTest, InvalidBase) {
+  const char *ten = "10";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, -1), 0ll);
+  ASSERT_EQ(errno, EINVAL);
+}
+
+TEST(LlvmLibcStrToLLTest, CleanBaseTenDecode) {
+  char *str_end = nullptr;
+
+  const char *ten = "10";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(ten, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - ten, 2l);
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+
+  const char *hundred = "100";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(hundred, &str_end, 10), 100ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - hundred, 3l);
+
+  const char *negative = "-100";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(negative, &str_end, 10), -100ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - negative, 4l);
+
+  const char *big_number = "123456789012345";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(big_number, &str_end, 10), 123456789012345ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - big_number, 15l);
+
+  const char *big_negative_number = "-123456789012345";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(big_negative_number, &str_end, 10),
+            -123456789012345ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - big_negative_number, 16l);
+
+  const char *too_big_number = "123456789012345678901";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(too_big_number, &str_end, 10), LLONG_MAX);
+  ASSERT_EQ(errno, ERANGE);
+  EXPECT_EQ(str_end - too_big_number, 19l);
+
+  const char *too_big_negative_number = "-123456789012345678901";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(too_big_negative_number, &str_end, 10),
+            LLONG_MIN);
+  ASSERT_EQ(errno, ERANGE);
+  EXPECT_EQ(str_end - too_big_negative_number, 20l);
+}
+
+TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) {
+  char *str_end = nullptr;
+
+  const char *spaces_before = "     10";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(spaces_before, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - spaces_before, 7l);
+
+  const char *spaces_after = "10      ";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(spaces_after, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - spaces_after, 2l);
+
+  const char *word_before = "word10";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(word_before, &str_end, 10), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - word_before, 0l);
+
+  const char *word_after = "10word";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(word_after, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - word_after, 2l);
+
+  const char *two_numbers = "10 999";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(two_numbers, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - two_numbers, 2l);
+
+  const char *two_signs = "--10 999";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(two_signs, &str_end, 10), 0ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - two_signs, 1l);
+
+  const char *sign_before = "+2=4";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(sign_before, &str_end, 10), 2ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - sign_before, 2l);
+
+  const char *sign_after = "2+2=4";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(sign_after, &str_end, 10), 2ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - sign_after, 1l);
+
+  const char *tab_before = "\t10";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(tab_before, &str_end, 10), 10ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - tab_before, 3l);
+
+  const char *all_together = "\t  -12345and+67890";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(all_together, &str_end, 10), -12345ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - all_together, 9l);
+}
+
+static char int_to_b36_char(int input) {
+  if (input < 0 || input > 36)
+    return '0';
+  if (input < 10)
+    return '0' + input;
+  return 'A' + input - 10;
+}
+
+TEST(LlvmLibcStrToLLTest, DecodeInOtherBases) {
+  char small_string[4] = {'\0', '\0', '\0', '\0'};
+  for (int base = 2; base <= 36; ++base) {
+    for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+      small_string[0] = int_to_b36_char(first_digit);
+      if (first_digit < base) {
+        errno = 0;
+        ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                  first_digit);
+        ASSERT_EQ(errno, 0);
+      } else {
+        errno = 0;
+        ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+        ASSERT_EQ(errno, 0);
+      }
+    }
+  }
+
+  for (int base = 2; base <= 36; ++base) {
+    for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+      small_string[0] = int_to_b36_char(first_digit);
+      for (long long second_digit = 0; second_digit <= 36; ++second_digit) {
+        small_string[1] = int_to_b36_char(second_digit);
+        if (first_digit < base && second_digit < base) {
+          errno = 0;
+          ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                    second_digit + (first_digit * base));
+          ASSERT_EQ(errno, 0);
+        } else if (first_digit < base) {
+          errno = 0;
+          ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                    first_digit);
+          ASSERT_EQ(errno, 0);
+        } else {
+          errno = 0;
+          ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+          ASSERT_EQ(errno, 0);
+        }
+      }
+    }
+  }
+
+  for (int base = 2; base <= 36; ++base) {
+    for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+      small_string[0] = int_to_b36_char(first_digit);
+      for (long long second_digit = 0; second_digit <= 36; ++second_digit) {
+        small_string[1] = int_to_b36_char(second_digit);
+        for (long long third_digit = 0; third_digit <= 36; ++third_digit) {
+          small_string[2] = int_to_b36_char(third_digit);
+
+          if (first_digit < base && second_digit < base && third_digit < base) {
+            errno = 0;
+            ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                      third_digit + (second_digit * base) +
+                          (first_digit * base * base));
+            ASSERT_EQ(errno, 0);
+          } else if (first_digit < base && second_digit < base) {
+            errno = 0;
+            ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                      second_digit + (first_digit * base));
+            ASSERT_EQ(errno, 0);
+          } else if (first_digit < base) {
+            // if the base is 16 there is a special case for the prefix 0X.
+            // The number is treated as a one digit hexadecimal.
+            if (base == 16 && first_digit == 0 && second_digit == 33) {
+              if (third_digit < base) {
+                errno = 0;
+                ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                          third_digit);
+                ASSERT_EQ(errno, 0);
+              } else {
+                errno = 0;
+                ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                          0ll);
+                ASSERT_EQ(errno, 0);
+              }
+            } else {
+              errno = 0;
+              ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+                        first_digit);
+              ASSERT_EQ(errno, 0);
+            }
+          } else {
+            errno = 0;
+            ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+            ASSERT_EQ(errno, 0);
+          }
+        }
+      }
+    }
+  }
+}
+
+TEST(LlvmLibcStrToLLTest, CleanBaseSixteenDecode) {
+  char *str_end = nullptr;
+
+  const char *no_prefix = "123abc";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(no_prefix, &str_end, 16), 0x123abcll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - no_prefix, 6l);
+
+  const char *yes_prefix = "0x456def";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(yes_prefix, &str_end, 16), 0x456defll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - yes_prefix, 8l);
+}
+
+TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) {
+  char *str_end = nullptr;
+
+  const char *base_ten = "12345";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(base_ten, &str_end, 0), 12345ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - base_ten, 5l);
+
+  const char *base_sixteen_no_prefix = "123abc";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_no_prefix, &str_end, 0), 123ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - base_sixteen_no_prefix, 3l);
+
+  const char *base_sixteen_with_prefix = "0x456def";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_with_prefix, &str_end, 0),
+            0x456defll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - base_sixteen_with_prefix, 8l);
+
+  const char *base_eight_with_prefix = "012345";
+  errno = 0;
+  ASSERT_EQ(__llvm_libc::strtoll(base_eight_with_prefix, &str_end, 0),
+            012345ll);
+  ASSERT_EQ(errno, 0);
+  EXPECT_EQ(str_end - base_eight_with_prefix, 6l);
+}


        


More information about the libc-commits mailing list