[libc-commits] [libc] [libc] implement l64a (PR #129099)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Thu Feb 27 11:19:39 PST 2025
https://github.com/michaelrj-google updated https://github.com/llvm/llvm-project/pull/129099
>From 723e8380a80f820cd5506cee0ea2d60b5a31f55a Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 27 Feb 2025 10:39:27 -0800
Subject: [PATCH 1/2] [libc] implement l64a
Adds l64a, which generates the base 64 string expected by a64l.
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/docs/dev/undefined_behavior.rst | 13 ++++
libc/src/stdlib/CMakeLists.txt | 11 +++
libc/src/stdlib/l64a.cpp | 64 ++++++++++++++++
libc/src/stdlib/l64a.h | 20 +++++
libc/test/src/stdlib/CMakeLists.txt | 11 +++
libc/test/src/stdlib/l64a_test.cpp | 94 ++++++++++++++++++++++++
7 files changed, 214 insertions(+)
create mode 100644 libc/src/stdlib/l64a.cpp
create mode 100644 libc/src/stdlib/l64a.h
create mode 100644 libc/test/src/stdlib/l64a_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index a07393a49e0ad..c9e9fe4eacdfc 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -185,6 +185,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdlib.atoll
libc.src.stdlib.bsearch
libc.src.stdlib.div
+ libc.src.stdlib.l64a
libc.src.stdlib.labs
libc.src.stdlib.ldiv
libc.src.stdlib.llabs
diff --git a/libc/docs/dev/undefined_behavior.rst b/libc/docs/dev/undefined_behavior.rst
index b252832e6715c..aeeaf17c09aa5 100644
--- a/libc/docs/dev/undefined_behavior.rst
+++ b/libc/docs/dev/undefined_behavior.rst
@@ -81,6 +81,7 @@ The C standard does not specify behavior for ``printf("%s", NULL)``. We will
print the string literal ``(null)`` unless using the
``LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS`` option described in :ref:`printf
behavior<printf_behavior>`.
+TODO: Move this to printf_behavior.
Unknown Math Rounding Direction
-------------------------------
@@ -143,3 +144,15 @@ More specific flags take precedence over less specific flags (i.e. '+' takes pre
Any conversion with a minimum width is padded with the padding character until it is at least as long as the minimum width.
Modifiers are applied, then the result is padded if necessary.
Any composite conversion will pass along all flags to the component conversions.
+
+a64l and l64a
+-------------
+These functions convert to and from a posix-specified base64 encoding. There are
+a few cases left undefined. For a64l, the behavior is undefined if the input
+pointer (s) is a null pointer. For LLVM-libc this will cause a null pointer
+dereference. It's also undefined if the input pointer to a64l wasn't generated
+by l64a. For LLVM-libc, if the user passes a valid base 64 string, it will be
+parsed as normal. For l64a it's unspecified what happens if the input value is
+negative. For LLVM-libc, all inputs to l64a are treated as unsigned 32 bit ints.
+Additionally, the return of l64a is in a thread-local buffer that's overwritten
+on each call.
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 361f2305358c9..63eb358717656 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -195,6 +195,17 @@ add_entrypoint_object(
libc.hdr.types.size_t
)
+add_entrypoint_object(
+ l64a
+ SRCS
+ l64a.cpp
+ HDRS
+ l64a.h
+ DEPENDS
+ libc.src.__support.ctype_utils
+ libc.hdr.types.size_t
+)
+
add_entrypoint_object(
abs
SRCS
diff --git a/libc/src/stdlib/l64a.cpp b/libc/src/stdlib/l64a.cpp
new file mode 100644
index 0000000000000..a3c1460a6f535
--- /dev/null
+++ b/libc/src/stdlib/l64a.cpp
@@ -0,0 +1,64 @@
+//===-- Implementation of l64a --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/l64a.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/libc_assert.h"
+#include "src/__support/macros/config.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// the standard says to only use up to 6 characters. Null terminator is
+// unnecessary, but we'll add it for ease-of-use. Also going from 48 -> 56 bits
+// probably won't matter since it's likely 32-bit aligned anyways.
+constexpr size_t MAX_BASE64_LENGTH = 6;
+LIBC_THREAD_LOCAL char BASE64_BUFFER[MAX_BASE64_LENGTH + 1];
+
+constexpr static char b64_int_to_char(uint32_t num) {
+ // from the standard: "The characters used to represent digits are '.' (dot)
+ // for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
+ // and 'a' through 'z' for [38,63]."
+ LIBC_ASSERT(num < 64);
+ if (num == 0)
+ return '.';
+ if (num == 1)
+ return '/';
+ if (num < 38)
+ return static_cast<char>(
+ internal::toupper(internal::int_to_b36_char(num - 2)));
+
+ // this tolower is technically unnecessary, but it provides safety if we
+ // change the default behavior of int_to_b36_char. Also the compiler
+ // completely elides it so there's no performance penalty, see:
+ // https://godbolt.org/z/sfxdhYvYb TODO: UPDATE THIS LINK
+ return static_cast<char>(
+ internal::tolower(internal::int_to_b36_char(num - 2 - 26)));
+}
+
+// This function takes a long and converts the low 32 bits of it into at most 6
+// characters. It's returned as a pointer to a static buffer.
+LLVM_LIBC_FUNCTION(char *, l64a, (long value)) {
+ // static cast to uint32_t to get just the low 32 bits in a consistent way.
+ // The standard says negative values are undefined, so I'm just defining them
+ // to be treated as unsigned.
+ uint32_t cur_value = static_cast<uint32_t>(value);
+ for (size_t i = 0; i < MAX_BASE64_LENGTH; ++i) {
+ uint32_t cur_char = cur_value % 64;
+ BASE64_BUFFER[i] = b64_int_to_char(cur_char);
+ cur_value /= 64;
+ }
+
+ BASE64_BUFFER[MAX_BASE64_LENGTH] = '\0'; // force null termination.
+ return BASE64_BUFFER;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/l64a.h b/libc/src/stdlib/l64a.h
new file mode 100644
index 0000000000000..be01f04f2c3b5
--- /dev/null
+++ b/libc/src/stdlib/l64a.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for l64a --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_l64a_H
+#define LLVM_LIBC_SRC_STDLIB_l64a_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+char *l64a(long value);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_l64a_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index 848100442c88b..90d7b7a55dbe6 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -231,6 +231,17 @@ add_libc_test(
libc.src.stdlib.a64l
)
+add_libc_test(
+ l64a_test
+ SUITE
+ libc-stdlib-tests
+ SRCS
+ l64a_test.cpp
+ DEPENDS
+ libc.src.stdlib.l64a
+ libc.src.__support.CPP.limits
+)
+
add_libc_test(
abs_test
SUITE
diff --git a/libc/test/src/stdlib/l64a_test.cpp b/libc/test/src/stdlib/l64a_test.cpp
new file mode 100644
index 0000000000000..a58ded1663b9e
--- /dev/null
+++ b/libc/test/src/stdlib/l64a_test.cpp
@@ -0,0 +1,94 @@
+//===-- Unittests for l64a ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/CPP/limits.h"
+#include "src/stdlib/l64a.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcL64aTest, Zero) {
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(0), "......");
+}
+TEST(LlvmLibcL64aTest, Max) {
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(
+ LIBC_NAMESPACE::cpp::numeric_limits<uint32_t>::max()),
+ "zzzzz1");
+}
+
+constexpr char B64_CHARS[64] = {
+ '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+ 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+ 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
+ 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+ 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+TEST(LlvmLibcL64aTest, OneCharacter) {
+ // The trailing null is technically unnecessary, but it means it won't look
+ // bad when we print it.
+ char expected_str[7] = {'\0', '.', '.', '.', '.', '.', '\0'};
+
+ for (size_t i = 0; i < 64; ++i) {
+ expected_str[0] = B64_CHARS[i];
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(i), expected_str);
+ }
+}
+
+TEST(LlvmLibcL64aTest, TwoCharacters) {
+ char expected_str[7] = {'\0', '\0', '.', '.', '.', '.', '\0'};
+
+ for (size_t first = 0; first < 64; ++first) {
+ expected_str[0] = B64_CHARS[first];
+ for (size_t second = 0; second < 64; ++second) {
+ expected_str[1] = B64_CHARS[second];
+
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(first + (second * 64)), expected_str);
+ }
+ }
+}
+
+TEST(LlvmLibcL64aTest, FiveSameCharacters) {
+ // Only using 5 because those are the only digits that can be any character.
+ char expected_str[7] = {'\0', '\0', '\0', '\0', '\0', '.', '\0'};
+
+ // set every 6th bit
+ const long BASE_NUM = 0b1000001000001000001000001;
+
+ for (size_t char_val = 0; char_val < 64; ++char_val) {
+ for (size_t i = 0; i < 5; ++i)
+ expected_str[i] = B64_CHARS[char_val];
+
+ const long input_num = BASE_NUM * char_val;
+
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(input_num), expected_str);
+ }
+}
+
+TEST(LlvmLibcL64aTest, OneOfSixCharacters) {
+ char expected_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+ for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
+ // clear the string, set all the chars to b64(0)
+ for (size_t i = 0; i < 6; ++i)
+ expected_str[i] = B64_CHARS[0];
+
+ for (size_t char_val = 0; char_val < 64; ++char_val) {
+ // Since each base64 character holds 6 bits and we're only using 32 bits
+ // of input, the 6th character only gets 2 bits, so it can never be
+ // greater than 3.
+ if (char_val > 3 && cur_char == 5)
+ break;
+ expected_str[cur_char] = B64_CHARS[char_val];
+
+ // Need to limit to 32 bits, since that's what the standard says the
+ // function does.
+ const long input_num = static_cast<int32_t>(char_val << (6 * cur_char));
+
+ ASSERT_STREQ(LIBC_NAMESPACE::l64a(input_num), expected_str);
+ }
+ }
+}
>From 641b4379b12c60b772f7d811ac95e623417ec12f Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Thu, 27 Feb 2025 11:19:10 -0800
Subject: [PATCH 2/2] update godbolt link and add TODO to a64l
---
libc/src/stdlib/a64l.cpp | 1 +
libc/src/stdlib/l64a.cpp | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/libc/src/stdlib/a64l.cpp b/libc/src/stdlib/a64l.cpp
index 5c1b819732abf..84be2d208f7d7 100644
--- a/libc/src/stdlib/a64l.cpp
+++ b/libc/src/stdlib/a64l.cpp
@@ -40,6 +40,7 @@ constexpr static int32_t b64_char_to_int(char ch) {
// This function takes a base 64 string and writes it to the low 32 bits of a
// long.
+// TODO: use LIBC_ADD_NULL_CHECKS for checking if the input is a null pointer.
LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
// the standard says to only use up to 6 characters.
constexpr size_t MAX_LENGTH = 6;
diff --git a/libc/src/stdlib/l64a.cpp b/libc/src/stdlib/l64a.cpp
index a3c1460a6f535..b5506c3e40c35 100644
--- a/libc/src/stdlib/l64a.cpp
+++ b/libc/src/stdlib/l64a.cpp
@@ -39,7 +39,7 @@ constexpr static char b64_int_to_char(uint32_t num) {
// this tolower is technically unnecessary, but it provides safety if we
// change the default behavior of int_to_b36_char. Also the compiler
// completely elides it so there's no performance penalty, see:
- // https://godbolt.org/z/sfxdhYvYb TODO: UPDATE THIS LINK
+ // https://godbolt.org/z/o5ennv7fc
return static_cast<char>(
internal::tolower(internal::int_to_b36_char(num - 2 - 26)));
}
More information about the libc-commits
mailing list