[libc-commits] [libc] [libc] implement a64l (PR #128758)
Michael Jones via libc-commits
libc-commits at lists.llvm.org
Tue Feb 25 10:46:54 PST 2025
https://github.com/michaelrj-google created https://github.com/llvm/llvm-project/pull/128758
Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html
>From 044981a1b2b74d1254de7eb68c5f7c56c8525df5 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 25 Feb 2025 10:44:07 -0800
Subject: [PATCH] [libc] implement a64l
Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/stdlib.yaml | 6 ++
libc/src/stdlib/CMakeLists.txt | 11 +++
libc/src/stdlib/a64l.cpp | 64 +++++++++++++++++
libc/src/stdlib/a64l.h | 20 ++++++
libc/test/src/stdlib/CMakeLists.txt | 10 +++
libc/test/src/stdlib/a64l_test.cpp | 87 ++++++++++++++++++++++++
7 files changed, 199 insertions(+)
create mode 100644 libc/src/stdlib/a64l.cpp
create mode 100644 libc/src/stdlib/a64l.h
create mode 100644 libc/test/src/stdlib/a64l_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index eaceb15c47291..22f747f24d92a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -177,6 +177,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.stdbit.stdc_trailing_zeros_us
# stdlib.h entrypoints
+ libc.src.stdlib.a64l
libc.src.stdlib.abs
libc.src.stdlib.atof
libc.src.stdlib.atoi
diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml
index 8d2b3f357e1a9..b308df98a6090 100644
--- a/libc/include/stdlib.yaml
+++ b/libc/include/stdlib.yaml
@@ -24,6 +24,12 @@ functions:
return_type: _Noreturn void
arguments:
- type: int
+ - name: a64l
+ standards:
+ - posix
+ return_type: long
+ arguments:
+ - type: const char *
- name: abort
standards:
- stdc
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 73a9fbf1e2ddc..361f2305358c9 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -184,6 +184,17 @@ add_entrypoint_object(
libc.src.__support.str_to_integer
)
+add_entrypoint_object(
+ a64l
+ SRCS
+ a64l.cpp
+ HDRS
+ a64l.h
+ DEPENDS
+ libc.src.__support.ctype_utils
+ libc.hdr.types.size_t
+)
+
add_entrypoint_object(
abs
SRCS
diff --git a/libc/src/stdlib/a64l.cpp b/libc/src/stdlib/a64l.cpp
new file mode 100644
index 0000000000000..670ac43804bff
--- /dev/null
+++ b/libc/src/stdlib/a64l.cpp
@@ -0,0 +1,64 @@
+//===-- Implementation of a64l --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/macros/config.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// I'm not sure this should go in ctype_utils since the specific ordering of
+// base64 is so very implementation specific, and also this set is unusual.
+// Returns -1 on any char without a specified value.
+constexpr int32_t b64_char_to_int(char ch) {
+ // from the standard: "The characters used to represent digits are '.' (dot)
+ // for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
+ // and 'a' through 'z' for [38,63]."
+ if (ch == '.')
+ return 0;
+ if (ch == '/')
+ return 1;
+
+ // handle the case of an unspecified char.
+ if (!internal::isalnum(ch))
+ return -1;
+
+ bool is_lower = internal::islower(ch);
+ // add 2 to account for '.' and '/', then b36_char_to_int is case insensitive
+ // so add case sensitivity back.
+ return internal::b36_char_to_int(ch) + 2 + (is_lower ? 26 : 0);
+}
+
+// This function takes a base 64 string and writes it to the low 32 bits of a
+// long.
+LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
+ // the standard says to only use up to 6 characters.
+ constexpr size_t MAX_LENGTH = 6;
+ int32_t result = 0;
+
+ for (size_t i = 0; i < MAX_LENGTH && s[i] != '\0'; ++i) {
+ int32_t cur_val = b64_char_to_int(s[i]);
+ // The standard says what happens on an unspecified character is undefined,
+ // here we treat it as the end of the string.
+ if (cur_val == -1)
+ break;
+
+ // the first digit is the least significant, so for each subsequent digit we
+ // shift it more. 6 bits since 2^6 = 64
+ result += (cur_val << (6 * i));
+ }
+
+ // standard says to sign extend from 32 bits.
+ return static_cast<long>(result);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/a64l.h b/libc/src/stdlib/a64l.h
new file mode 100644
index 0000000000000..024be058f756c
--- /dev/null
+++ b/libc/src/stdlib/a64l.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for a64l --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_A64L_H
+#define LLVM_LIBC_SRC_STDLIB_A64L_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+long a64l(const char *s);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_A64L_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index e6c8a629c71fa..848100442c88b 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -221,6 +221,16 @@ add_libc_test(
${strfrom_test_copts}
)
+add_libc_test(
+ a64l_test
+ SUITE
+ libc-stdlib-tests
+ SRCS
+ a64l_test.cpp
+ DEPENDS
+ libc.src.stdlib.a64l
+)
+
add_libc_test(
abs_test
SUITE
diff --git a/libc/test/src/stdlib/a64l_test.cpp b/libc/test/src/stdlib/a64l_test.cpp
new file mode 100644
index 0000000000000..acdef5d69543d
--- /dev/null
+++ b/libc/test/src/stdlib/a64l_test.cpp
@@ -0,0 +1,87 @@
+//===-- Unittests for a64l ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcA64lTest, EmptyString) { ASSERT_EQ(LIBC_NAMESPACE::a64l(""), 0l); }
+TEST(LlvmLibcA64lTest, FullString) {
+ ASSERT_EQ(LIBC_NAMESPACE::a64l("AbC12/"), 1141696972l);
+}
+
+constexpr char B64_CHARS[64] = {
+ '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+ 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+ 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
+ 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+ 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+TEST(LlvmLibcA64lTest, OneCharacter) {
+ char example_str[2] = {'\0', '\0'};
+
+ for (size_t i = 0; i < 64; ++i) {
+ example_str[0] = B64_CHARS[i];
+ ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), static_cast<long>(i));
+ }
+}
+
+TEST(LlvmLibcA64lTest, TwoCharacters) {
+ char example_str[3] = {'\0', '\0', '\0'};
+
+ for (size_t first = 0; first < 64; ++first) {
+ example_str[0] = B64_CHARS[first];
+ for (size_t second = 0; second < 64; ++second) {
+ example_str[1] = B64_CHARS[second];
+
+ ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str),
+ static_cast<long>(first + (second * 64)));
+ }
+ }
+}
+
+TEST(LlvmLibcA64lTest, FiveSameCharacters) {
+ // Technically the last digit can be parsed to give the last two bits. Not
+ // handling that here.
+ char example_str[6] = {
+ '\0', '\0', '\0', '\0', '\0', '\0',
+ };
+
+ // set every 6th bit
+ const long BASE_NUM = 0b1000001000001000001000001;
+
+ for (size_t char_val = 0; char_val < 64; ++char_val) {
+ for (size_t i = 0; i < 5; ++i)
+ example_str[i] = B64_CHARS[char_val];
+
+ const long expected_result = BASE_NUM * char_val;
+
+ ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+ }
+}
+
+TEST(LlvmLibcA64lTest, OneOfSixCharacters) {
+ char example_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+ for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
+ // clear the string, set all the chars to b64(0)
+ for (size_t i = 0; i < 6; ++i)
+ example_str[i] = B64_CHARS[0];
+
+ for (size_t char_val = 0; char_val < 64; ++char_val) {
+ example_str[cur_char] = B64_CHARS[char_val];
+
+ // Need to limit to 32 bits, since that's what the standard says the
+ // function does.
+ const long expected_result =
+ static_cast<int32_t>(char_val << (6 * cur_char));
+
+ ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+ }
+ }
+}
More information about the libc-commits
mailing list