[libc-commits] [libc] b062d63 - [libc] add strtoll function and backend
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Wed Aug 11 16:37:29 PDT 2021
Author: Michael Jones
Date: 2021-08-11T23:37:23Z
New Revision: b062d639bb3a315f2b5f410e99b9c3ea10c530cc
URL: https://github.com/llvm/llvm-project/commit/b062d639bb3a315f2b5f410e99b9c3ea10c530cc
DIFF: https://github.com/llvm/llvm-project/commit/b062d639bb3a315f2b5f410e99b9c3ea10c530cc.diff
LOG: [libc] add strtoll function and backend
This change adds the stroll function, but most of the implementation is
in the new file str_conv_utils.h since many of the other integer
conversion functions are implemented through what are effectively calls
to strtoll.
Reviewed By: sivachandra
Differential Revision: https://reviews.llvm.org/D107792
Added:
libc/src/__support/str_conv_utils.h
libc/src/stdlib/strtoll.cpp
libc/src/stdlib/strtoll.h
libc/test/src/stdlib/strtoll_test.cpp
Modified:
libc/config/linux/x86_64/entrypoints.txt
libc/spec/spec.td
libc/spec/stdc.td
libc/src/__support/CMakeLists.txt
libc/src/stdlib/CMakeLists.txt
libc/test/src/stdlib/CMakeLists.txt
Removed:
################################################################################
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 6e9d991d25320..90346b4b3461f 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -158,6 +158,7 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.stdlib.abs
libc.src.stdlib.labs
libc.src.stdlib.llabs
+ libc.src.stdlib.strtoll
# signal.h entrypoints
libc.src.signal.raise
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index c78c0f053e06e..e49c759c1f790 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -62,6 +62,7 @@ def ConstVoidRestrictedPtr : ConstType<VoidRestrictedPtr>;
def CharPtr : PtrType<CharType>;
def ConstCharPtr : ConstType<CharPtr>;
def CharRestrictedPtr : RestrictedPtrType<CharType>;
+def CharRestrictedPtrPtr : RestrictedPtrType<CharPtr>;
def ConstCharRestrictedPtr : ConstType<CharRestrictedPtr>;
def OnceFlagType : NamedType<"once_flag">;
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index c6621812258cc..eb0ffdfe2e9d2 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -478,6 +478,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"abs", RetValSpec<IntType>, [ArgSpec<IntType>]>,
FunctionSpec<"labs", RetValSpec<LongType>, [ArgSpec<LongType>]>,
FunctionSpec<"llabs", RetValSpec<LongLongType>, [ArgSpec<LongLongType>]>,
+ FunctionSpec<"strtoll", RetValSpec<LongLongType>, [ArgSpec<ConstCharRestrictedPtr>, ArgSpec<CharRestrictedPtrPtr>, ArgSpec<IntType>]>,
FunctionSpec<"_Exit", RetValSpec<NoReturn>, [ArgSpec<IntType>]>,
]
>;
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 5135632c377ac..dce6cb871ec56 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -12,6 +12,16 @@ add_header_library(
ctype_utils.h
)
+add_header_library(
+ str_conv_utils
+ HDRS
+ str_conv_utils.h
+ DEPENDS
+ .ctype_utils
+ libc.include.errno
+ libc.src.errno.__errno_location
+)
+
add_header_library(
integer_operations
HDRS
diff --git a/libc/src/__support/str_conv_utils.h b/libc/src/__support/str_conv_utils.h
new file mode 100644
index 0000000000000..6a83fa6799a77
--- /dev/null
+++ b/libc/src/__support/str_conv_utils.h
@@ -0,0 +1,111 @@
+//===-- Stdlib utils --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC_STDLIB_STDLIB_UTILS_H
+#define LIBC_SRC_STDLIB_STDLIB_UTILS_H
+
+#include "src/__support/ctype_utils.h"
+#include <errno.h>
+#include <limits.h>
+
+namespace __llvm_libc {
+namespace internal {
+
+// Returns a pointer to the first character in src that is not a whitespace
+// character (as determined by isspace())
+static inline const char *first_non_whitespace(const char *__restrict src) {
+ while (internal::isspace(*src)) {
+ ++src;
+ }
+ return src;
+}
+
+static inline int b36_char_to_int(char input) {
+ if (isdigit(input))
+ return input - '0';
+ if (isalpha(input))
+ return (input | 32) + 10 - 'a';
+ return 0;
+}
+
+// Takes the address of the string pointer and parses the base from the start of
+// it. This will advance the string pointer.
+static inline int infer_base(const char *__restrict *__restrict src) {
+ if (**src == '0') {
+ ++(*src);
+ if ((**src | 32) == 'x') {
+ ++(*src);
+ return 16;
+ }
+ return 8;
+ }
+ return 10;
+}
+
+// Takes a pointer to a string, a pointer to a string pointer, and the base to
+// convert to. This function is used as the backend for all of the string to int
+// functions.
+static inline long long strtoll(const char *__restrict src,
+ char **__restrict str_end, int base) {
+ unsigned long long result = 0;
+
+ if (base < 0 || base == 1 || base > 36) {
+ errno = EINVAL; // NOLINT
+ return 0;
+ }
+
+ src = first_non_whitespace(src);
+
+ char result_sign = '+';
+ if (*src == '+' || *src == '-') {
+ result_sign = *src;
+ ++src;
+ }
+
+ if (base == 0) {
+ base = infer_base(&src);
+ } else if (base == 16 && *src == '0' && (*(src + 1) | 32) == 'x') {
+ src = src + 2;
+ }
+
+ unsigned long long const ABS_MAX =
+ (result_sign == '+' ? LLONG_MAX
+ : static_cast<unsigned long long>(LLONG_MAX) + 1);
+ unsigned long long const ABS_MAX_DIV_BY_BASE = ABS_MAX / base;
+ while (isalnum(*src)) {
+ int cur_digit = b36_char_to_int(*src);
+ if (cur_digit >= base)
+ break;
+ if (result > ABS_MAX_DIV_BY_BASE) {
+ result = ABS_MAX;
+ errno = ERANGE; // NOLINT
+ break;
+ }
+ result = result * base;
+ if (result > ABS_MAX - cur_digit) {
+ result = ABS_MAX;
+ errno = ERANGE; // NOLINT
+ break;
+ }
+ result = result + cur_digit;
+
+ ++src;
+ }
+
+ if (str_end != nullptr)
+ *str_end = const_cast<char *>(src);
+ if (result_sign == '+')
+ return result;
+ else
+ return -result;
+}
+
+} // namespace internal
+} // namespace __llvm_libc
+
+#endif // LIBC_SRC_STDLIB_STDLIB_UTILS_H
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 60a380d1051e9..8457310897f5f 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -50,3 +50,13 @@ add_entrypoint_object(
DEPENDS
libc.src.__support.integer_operations
)
+
+add_entrypoint_object(
+ strtoll
+ SRCS
+ strtoll.cpp
+ HDRS
+ strtoll.h
+ DEPENDS
+ libc.src.__support.str_conv_utils
+)
diff --git a/libc/src/stdlib/strtoll.cpp b/libc/src/stdlib/strtoll.cpp
new file mode 100644
index 0000000000000..c99106d7c4232
--- /dev/null
+++ b/libc/src/stdlib/strtoll.cpp
@@ -0,0 +1,21 @@
+//===-- Implementation of strtoll -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strtoll.h"
+#include "src/__support/common.h"
+#include "src/__support/str_conv_utils.h"
+
+namespace __llvm_libc {
+
+LLVM_LIBC_FUNCTION(long long, strtoll,
+ (const char *__restrict str, char **__restrict str_end,
+ int base)) {
+ return internal::strtoll(str, str_end, base);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/stdlib/strtoll.h b/libc/src/stdlib/strtoll.h
new file mode 100644
index 0000000000000..63d7335b88dbe
--- /dev/null
+++ b/libc/src/stdlib/strtoll.h
@@ -0,0 +1,19 @@
+//===-- Implementation header for strtoll -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_STRTOLL_H
+#define LLVM_LIBC_SRC_STDLIB_STRTOLL_H
+
+namespace __llvm_libc {
+
+long long strtoll(const char *__restrict str, char **__restrict str_end,
+ int base);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_STDLIB_STRTOLL_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index e5cb04a58db1b..d95f95b8d79e2 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -54,3 +54,15 @@ add_libc_unittest(
DEPENDS
libc.src.stdlib.llabs
)
+
+add_libc_unittest(
+ strtoll_test
+ SUITE
+ libc_stdlib_unittests
+ SRCS
+ strtoll_test.cpp
+ DEPENDS
+ libc.src.stdlib.strtoll
+ libc.include.errno
+ libc.test.errno_setter_matcher
+)
diff --git a/libc/test/src/stdlib/strtoll_test.cpp b/libc/test/src/stdlib/strtoll_test.cpp
new file mode 100644
index 0000000000000..437d42f5bab80
--- /dev/null
+++ b/libc/test/src/stdlib/strtoll_test.cpp
@@ -0,0 +1,283 @@
+//===-- Unittests for strtoll ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/strtoll.h"
+
+#include "utils/UnitTest/Test.h"
+
+#include <errno.h>
+#include <limits.h>
+
+TEST(LlvmLibcStrToLLTest, InvalidBase) {
+ const char *ten = "10";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, -1), 0ll);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST(LlvmLibcStrToLLTest, CleanBaseTenDecode) {
+ char *str_end = nullptr;
+
+ const char *ten = "10";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(ten, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - ten, 2l);
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(ten, nullptr, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+
+ const char *hundred = "100";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(hundred, &str_end, 10), 100ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - hundred, 3l);
+
+ const char *negative = "-100";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(negative, &str_end, 10), -100ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - negative, 4l);
+
+ const char *big_number = "123456789012345";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(big_number, &str_end, 10), 123456789012345ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - big_number, 15l);
+
+ const char *big_negative_number = "-123456789012345";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(big_negative_number, &str_end, 10),
+ -123456789012345ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - big_negative_number, 16l);
+
+ const char *too_big_number = "123456789012345678901";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(too_big_number, &str_end, 10), LLONG_MAX);
+ ASSERT_EQ(errno, ERANGE);
+ EXPECT_EQ(str_end - too_big_number, 19l);
+
+ const char *too_big_negative_number = "-123456789012345678901";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(too_big_negative_number, &str_end, 10),
+ LLONG_MIN);
+ ASSERT_EQ(errno, ERANGE);
+ EXPECT_EQ(str_end - too_big_negative_number, 20l);
+}
+
+TEST(LlvmLibcStrToLLTest, MessyBaseTenDecode) {
+ char *str_end = nullptr;
+
+ const char *spaces_before = " 10";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(spaces_before, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - spaces_before, 7l);
+
+ const char *spaces_after = "10 ";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(spaces_after, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - spaces_after, 2l);
+
+ const char *word_before = "word10";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(word_before, &str_end, 10), 0ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - word_before, 0l);
+
+ const char *word_after = "10word";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(word_after, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - word_after, 2l);
+
+ const char *two_numbers = "10 999";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(two_numbers, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - two_numbers, 2l);
+
+ const char *two_signs = "--10 999";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(two_signs, &str_end, 10), 0ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - two_signs, 1l);
+
+ const char *sign_before = "+2=4";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(sign_before, &str_end, 10), 2ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - sign_before, 2l);
+
+ const char *sign_after = "2+2=4";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(sign_after, &str_end, 10), 2ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - sign_after, 1l);
+
+ const char *tab_before = "\t10";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(tab_before, &str_end, 10), 10ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - tab_before, 3l);
+
+ const char *all_together = "\t -12345and+67890";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(all_together, &str_end, 10), -12345ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - all_together, 9l);
+}
+
+static char int_to_b36_char(int input) {
+ if (input < 0 || input > 36)
+ return '0';
+ if (input < 10)
+ return '0' + input;
+ return 'A' + input - 10;
+}
+
+TEST(LlvmLibcStrToLLTest, DecodeInOtherBases) {
+ char small_string[4] = {'\0', '\0', '\0', '\0'};
+ for (int base = 2; base <= 36; ++base) {
+ for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+ small_string[0] = int_to_b36_char(first_digit);
+ if (first_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ first_digit);
+ ASSERT_EQ(errno, 0);
+ } else {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+ ASSERT_EQ(errno, 0);
+ }
+ }
+ }
+
+ for (int base = 2; base <= 36; ++base) {
+ for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+ small_string[0] = int_to_b36_char(first_digit);
+ for (long long second_digit = 0; second_digit <= 36; ++second_digit) {
+ small_string[1] = int_to_b36_char(second_digit);
+ if (first_digit < base && second_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ second_digit + (first_digit * base));
+ ASSERT_EQ(errno, 0);
+ } else if (first_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ first_digit);
+ ASSERT_EQ(errno, 0);
+ } else {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+ ASSERT_EQ(errno, 0);
+ }
+ }
+ }
+ }
+
+ for (int base = 2; base <= 36; ++base) {
+ for (long long first_digit = 0; first_digit <= 36; ++first_digit) {
+ small_string[0] = int_to_b36_char(first_digit);
+ for (long long second_digit = 0; second_digit <= 36; ++second_digit) {
+ small_string[1] = int_to_b36_char(second_digit);
+ for (long long third_digit = 0; third_digit <= 36; ++third_digit) {
+ small_string[2] = int_to_b36_char(third_digit);
+
+ if (first_digit < base && second_digit < base && third_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ third_digit + (second_digit * base) +
+ (first_digit * base * base));
+ ASSERT_EQ(errno, 0);
+ } else if (first_digit < base && second_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ second_digit + (first_digit * base));
+ ASSERT_EQ(errno, 0);
+ } else if (first_digit < base) {
+ // if the base is 16 there is a special case for the prefix 0X.
+ // The number is treated as a one digit hexadecimal.
+ if (base == 16 && first_digit == 0 && second_digit == 33) {
+ if (third_digit < base) {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ third_digit);
+ ASSERT_EQ(errno, 0);
+ } else {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ 0ll);
+ ASSERT_EQ(errno, 0);
+ }
+ } else {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base),
+ first_digit);
+ ASSERT_EQ(errno, 0);
+ }
+ } else {
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(small_string, nullptr, base), 0ll);
+ ASSERT_EQ(errno, 0);
+ }
+ }
+ }
+ }
+ }
+}
+
+TEST(LlvmLibcStrToLLTest, CleanBaseSixteenDecode) {
+ char *str_end = nullptr;
+
+ const char *no_prefix = "123abc";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(no_prefix, &str_end, 16), 0x123abcll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - no_prefix, 6l);
+
+ const char *yes_prefix = "0x456def";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(yes_prefix, &str_end, 16), 0x456defll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - yes_prefix, 8l);
+}
+
+TEST(LlvmLibcStrToLLTest, AutomaticBaseSelection) {
+ char *str_end = nullptr;
+
+ const char *base_ten = "12345";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(base_ten, &str_end, 0), 12345ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - base_ten, 5l);
+
+ const char *base_sixteen_no_prefix = "123abc";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_no_prefix, &str_end, 0), 123ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - base_sixteen_no_prefix, 3l);
+
+ const char *base_sixteen_with_prefix = "0x456def";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(base_sixteen_with_prefix, &str_end, 0),
+ 0x456defll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - base_sixteen_with_prefix, 8l);
+
+ const char *base_eight_with_prefix = "012345";
+ errno = 0;
+ ASSERT_EQ(__llvm_libc::strtoll(base_eight_with_prefix, &str_end, 0),
+ 012345ll);
+ ASSERT_EQ(errno, 0);
+ EXPECT_EQ(str_end - base_eight_with_prefix, 6l);
+}
More information about the libc-commits
mailing list