[libc-commits] [libc] [libc] implement a64l (PR #128758)

Michael Jones via libc-commits libc-commits at lists.llvm.org
Tue Feb 25 10:46:54 PST 2025


https://github.com/michaelrj-google created https://github.com/llvm/llvm-project/pull/128758

Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html


>From 044981a1b2b74d1254de7eb68c5f7c56c8525df5 Mon Sep 17 00:00:00 2001
From: Michael Jones <michaelrj at google.com>
Date: Tue, 25 Feb 2025 10:44:07 -0800
Subject: [PATCH] [libc] implement a64l

Implement the posix function a64l.
Standard: https://pubs.opengroup.org/onlinepubs/9799919799/functions/a64l.html
---
 libc/config/linux/x86_64/entrypoints.txt |  1 +
 libc/include/stdlib.yaml                 |  6 ++
 libc/src/stdlib/CMakeLists.txt           | 11 +++
 libc/src/stdlib/a64l.cpp                 | 64 +++++++++++++++++
 libc/src/stdlib/a64l.h                   | 20 ++++++
 libc/test/src/stdlib/CMakeLists.txt      | 10 +++
 libc/test/src/stdlib/a64l_test.cpp       | 87 ++++++++++++++++++++++++
 7 files changed, 199 insertions(+)
 create mode 100644 libc/src/stdlib/a64l.cpp
 create mode 100644 libc/src/stdlib/a64l.h
 create mode 100644 libc/test/src/stdlib/a64l_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index eaceb15c47291..22f747f24d92a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -177,6 +177,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.stdbit.stdc_trailing_zeros_us
 
     # stdlib.h entrypoints
+    libc.src.stdlib.a64l
     libc.src.stdlib.abs
     libc.src.stdlib.atof
     libc.src.stdlib.atoi
diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml
index 8d2b3f357e1a9..b308df98a6090 100644
--- a/libc/include/stdlib.yaml
+++ b/libc/include/stdlib.yaml
@@ -24,6 +24,12 @@ functions:
     return_type: _Noreturn void
     arguments:
       - type: int
+  - name: a64l
+    standards:
+      - posix
+    return_type: long
+    arguments:
+      - type: const char *
   - name: abort
     standards:
       - stdc
diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt
index 73a9fbf1e2ddc..361f2305358c9 100644
--- a/libc/src/stdlib/CMakeLists.txt
+++ b/libc/src/stdlib/CMakeLists.txt
@@ -184,6 +184,17 @@ add_entrypoint_object(
     libc.src.__support.str_to_integer
 )
 
+add_entrypoint_object(
+  a64l
+  SRCS
+    a64l.cpp
+  HDRS
+    a64l.h
+  DEPENDS
+    libc.src.__support.ctype_utils
+    libc.hdr.types.size_t
+)
+
 add_entrypoint_object(
   abs
   SRCS
diff --git a/libc/src/stdlib/a64l.cpp b/libc/src/stdlib/a64l.cpp
new file mode 100644
index 0000000000000..670ac43804bff
--- /dev/null
+++ b/libc/src/stdlib/a64l.cpp
@@ -0,0 +1,64 @@
+//===-- Implementation of a64l --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "hdr/types/size_t.h"
+#include "src/__support/common.h"
+#include "src/__support/ctype_utils.h"
+#include "src/__support/macros/config.h"
+
+#include <stdint.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+// I'm not sure this should go in ctype_utils since the specific ordering of
+// base64 is so very implementation specific, and also this set is unusual.
+// Returns -1 on any char without a specified value.
+constexpr int32_t b64_char_to_int(char ch) {
+  // from the standard: "The characters used to represent digits are '.' (dot)
+  // for 0, '/' for 1, '0' through '9' for [2,11], 'A' through 'Z' for [12,37],
+  // and 'a' through 'z' for [38,63]."
+  if (ch == '.')
+    return 0;
+  if (ch == '/')
+    return 1;
+
+  // handle the case of an unspecified char.
+  if (!internal::isalnum(ch))
+    return -1;
+
+  bool is_lower = internal::islower(ch);
+  // add 2 to account for '.' and '/', then b36_char_to_int is case insensitive
+  // so add case sensitivity back.
+  return internal::b36_char_to_int(ch) + 2 + (is_lower ? 26 : 0);
+}
+
+// This function takes a base 64 string and writes it to the low 32 bits of a
+// long.
+LLVM_LIBC_FUNCTION(long, a64l, (const char *s)) {
+  // the standard says to only use up to 6 characters.
+  constexpr size_t MAX_LENGTH = 6;
+  int32_t result = 0;
+
+  for (size_t i = 0; i < MAX_LENGTH && s[i] != '\0'; ++i) {
+    int32_t cur_val = b64_char_to_int(s[i]);
+    // The standard says what happens on an unspecified character is undefined,
+    // here we treat it as the end of the string.
+    if (cur_val == -1)
+      break;
+
+    // the first digit is the least significant, so for each subsequent digit we
+    // shift it more. 6 bits since 2^6 = 64
+    result += (cur_val << (6 * i));
+  }
+
+  // standard says to sign extend from 32 bits.
+  return static_cast<long>(result);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/stdlib/a64l.h b/libc/src/stdlib/a64l.h
new file mode 100644
index 0000000000000..024be058f756c
--- /dev/null
+++ b/libc/src/stdlib/a64l.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for a64l --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STDLIB_A64L_H
+#define LLVM_LIBC_SRC_STDLIB_A64L_H
+
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+long a64l(const char *s);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STDLIB_A64L_H
diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt
index e6c8a629c71fa..848100442c88b 100644
--- a/libc/test/src/stdlib/CMakeLists.txt
+++ b/libc/test/src/stdlib/CMakeLists.txt
@@ -221,6 +221,16 @@ add_libc_test(
     ${strfrom_test_copts}
 )
 
+add_libc_test(
+  a64l_test
+  SUITE
+    libc-stdlib-tests
+  SRCS
+    a64l_test.cpp
+  DEPENDS
+    libc.src.stdlib.a64l
+)
+
 add_libc_test(
   abs_test
   SUITE
diff --git a/libc/test/src/stdlib/a64l_test.cpp b/libc/test/src/stdlib/a64l_test.cpp
new file mode 100644
index 0000000000000..acdef5d69543d
--- /dev/null
+++ b/libc/test/src/stdlib/a64l_test.cpp
@@ -0,0 +1,87 @@
+//===-- Unittests for a64l ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/stdlib/a64l.h"
+#include "test/UnitTest/Test.h"
+
+TEST(LlvmLibcA64lTest, EmptyString) { ASSERT_EQ(LIBC_NAMESPACE::a64l(""), 0l); }
+TEST(LlvmLibcA64lTest, FullString) {
+  ASSERT_EQ(LIBC_NAMESPACE::a64l("AbC12/"), 1141696972l);
+}
+
+constexpr char B64_CHARS[64] = {
+    '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A',
+    'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
+    'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a',
+    'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
+    'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+};
+
+TEST(LlvmLibcA64lTest, OneCharacter) {
+  char example_str[2] = {'\0', '\0'};
+
+  for (size_t i = 0; i < 64; ++i) {
+    example_str[0] = B64_CHARS[i];
+    ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), static_cast<long>(i));
+  }
+}
+
+TEST(LlvmLibcA64lTest, TwoCharacters) {
+  char example_str[3] = {'\0', '\0', '\0'};
+
+  for (size_t first = 0; first < 64; ++first) {
+    example_str[0] = B64_CHARS[first];
+    for (size_t second = 0; second < 64; ++second) {
+      example_str[1] = B64_CHARS[second];
+
+      ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str),
+                static_cast<long>(first + (second * 64)));
+    }
+  }
+}
+
+TEST(LlvmLibcA64lTest, FiveSameCharacters) {
+  // Technically the last digit can be parsed to give the last two bits. Not
+  // handling that here.
+  char example_str[6] = {
+      '\0', '\0', '\0', '\0', '\0', '\0',
+  };
+
+  // set every 6th bit
+  const long BASE_NUM = 0b1000001000001000001000001;
+
+  for (size_t char_val = 0; char_val < 64; ++char_val) {
+    for (size_t i = 0; i < 5; ++i)
+      example_str[i] = B64_CHARS[char_val];
+
+    const long expected_result = BASE_NUM * char_val;
+
+    ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+  }
+}
+
+TEST(LlvmLibcA64lTest, OneOfSixCharacters) {
+  char example_str[7] = {'\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+  for (size_t cur_char = 0; cur_char < 6; ++cur_char) {
+    // clear the string, set all the chars to b64(0)
+    for (size_t i = 0; i < 6; ++i)
+      example_str[i] = B64_CHARS[0];
+
+    for (size_t char_val = 0; char_val < 64; ++char_val) {
+      example_str[cur_char] = B64_CHARS[char_val];
+
+      // Need to limit to 32 bits, since that's what the standard says the
+      // function does.
+      const long expected_result =
+          static_cast<int32_t>(char_val << (6 * cur_char));
+
+      ASSERT_EQ(LIBC_NAMESPACE::a64l(example_str), expected_result);
+    }
+  }
+}



More information about the libc-commits mailing list