[libc-commits] [libc] [libc] Add support for 'string.h' locale variants (PR #105719)

Joseph Huber via libc-commits libc-commits at lists.llvm.org
Thu Aug 29 12:20:14 PDT 2024


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/105719

>From 2da2d14b8b4370fbc5f91c27d1ed7d3290491141 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Sun, 11 Aug 2024 08:34:09 -0500
Subject: [PATCH] [libc] Add support for 'string.h' locale variants

Summary:
This adds the locale variants of the string functions. As previously,
these do not use the locale information at all and simply copy the
non-locale version which expects the "C" locale.
---
 libc/config/gpu/entrypoints.txt          |  2 ++
 libc/config/linux/x86_64/entrypoints.txt |  4 ++++
 libc/include/string.h.def                |  1 +
 libc/newhdrgen/yaml/string.yaml          | 17 ++++++++++++++
 libc/spec/stdc.td                        | 13 +++++++++++
 libc/src/string/CMakeLists.txt           | 19 ++++++++++++++++
 libc/src/string/strcoll_l.cpp            | 24 ++++++++++++++++++++
 libc/src/string/strcoll_l.h              | 21 ++++++++++++++++++
 libc/src/string/strxfrm_l.cpp            | 28 ++++++++++++++++++++++++
 libc/src/string/strxfrm_l.h              | 23 +++++++++++++++++++
 10 files changed, 152 insertions(+)
 create mode 100644 libc/src/string/strcoll_l.cpp
 create mode 100644 libc/src/string/strcoll_l.h
 create mode 100644 libc/src/string/strxfrm_l.cpp
 create mode 100644 libc/src/string/strxfrm_l.h

diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt
index d8f78f0d174534..706f603b6ff56f 100644
--- a/libc/config/gpu/entrypoints.txt
+++ b/libc/config/gpu/entrypoints.txt
@@ -58,6 +58,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strchrnul
     libc.src.string.strcmp
     libc.src.string.strcoll
+    libc.src.string.strcoll_l
     libc.src.string.strcpy
     libc.src.string.strcspn
     libc.src.string.strdup
@@ -79,6 +80,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.string.strtok
     libc.src.string.strtok_r
     libc.src.string.strxfrm
+    libc.src.string.strxfrm_l
 
     # stdbit.h entrypoints
     libc.src.stdbit.stdc_bit_ceil_uc
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 0aa38c7afc76f4..3fd88fc0020e55 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -809,6 +809,10 @@ if(LLVM_LIBC_FULL_BUILD)
     libc.src.stdlib.strtoul_l
     libc.src.stdlib.strtoull_l
 
+    # string.h entrypoints
+    libc.src.string.strcoll_l
+    libc.src.string.strxfrm_l
+
     # assert.h entrypoints
     libc.src.assert.__assert_fail
 
diff --git a/libc/include/string.h.def b/libc/include/string.h.def
index 1bd2687db2beac..e180f0d2561d3a 100644
--- a/libc/include/string.h.def
+++ b/libc/include/string.h.def
@@ -11,6 +11,7 @@
 
 #include "__llvm-libc-common.h"
 
+#include "llvm-libc-types/locale_t.h"
 #include "llvm-libc-macros/null-macro.h"
 
 %%public_api()
diff --git a/libc/newhdrgen/yaml/string.yaml b/libc/newhdrgen/yaml/string.yaml
index 1d6e64bfb9cf60..af1750e91243ea 100644
--- a/libc/newhdrgen/yaml/string.yaml
+++ b/libc/newhdrgen/yaml/string.yaml
@@ -144,6 +144,14 @@ functions:
     arguments:
       - type: const char *
       - type: const char *
+  - name: strcoll_l
+    standards:
+      - stdc
+    return_type: int
+    arguments:
+      - type: const char *
+      - type: const char *
+      - type: locale_t
   - name: strcpy
     standards:
       - stdc
@@ -300,3 +308,12 @@ functions:
       - type: char *__restrict
       - type: const char *__restrict
       - type: size_t
+  - name: strxfrm_l
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: char *__restrict
+      - type: const char *__restrict
+      - type: size_t
+      - type: locale_t
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index 2c61cb9d952951..1742e1f7b0ef33 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -354,6 +354,11 @@ def StdC : StandardSpec<"stdc"> {
               RetValSpec<IntType>,
               [ArgSpec<ConstCharPtr>, ArgSpec<ConstCharPtr>]
           >,
+          FunctionSpec<
+              "strcoll_l",
+              RetValSpec<IntType>,
+              [ArgSpec<ConstCharPtr>, ArgSpec<ConstCharPtr>, ArgSpec<LocaleT>]
+          >,
           FunctionSpec<
               "strncmp",
               RetValSpec<IntType>,
@@ -366,6 +371,14 @@ def StdC : StandardSpec<"stdc"> {
                ArgSpec<ConstCharRestrictedPtr>,
                ArgSpec<SizeTType>]
           >,
+          FunctionSpec<
+              "strxfrm_l",
+              RetValSpec<SizeTType>,
+              [ArgSpec<CharRestrictedPtr>,
+               ArgSpec<ConstCharRestrictedPtr>,
+               ArgSpec<SizeTType>,
+               ArgSpec<LocaleT>]
+          >,
           FunctionSpec<
               "strchr",
               RetValSpec<CharPtr>,
diff --git a/libc/src/string/CMakeLists.txt b/libc/src/string/CMakeLists.txt
index 56588ffafb86f0..787188ab3beb91 100644
--- a/libc/src/string/CMakeLists.txt
+++ b/libc/src/string/CMakeLists.txt
@@ -200,6 +200,14 @@ add_entrypoint_object(
     strcoll.h
 )
 
+add_entrypoint_object(
+  strcoll_l
+  SRCS
+    strcoll_l.cpp
+  HDRS
+  strcoll_l.h
+)
+
 add_entrypoint_object(
   strcpy
   SRCS
@@ -441,6 +449,17 @@ add_entrypoint_object(
     .memory_utils.inline_memcpy
 )
 
+add_entrypoint_object(
+  strxfrm_l
+  SRCS
+    strxfrm_l.cpp
+  HDRS
+    strxfrm_l.h
+  DEPENDS
+    .string_utils
+    .memory_utils.inline_memcpy
+)
+
 add_entrypoint_object(
   memset_explicit
   SRCS
diff --git a/libc/src/string/strcoll_l.cpp b/libc/src/string/strcoll_l.cpp
new file mode 100644
index 00000000000000..f664a3c7c03f37
--- /dev/null
+++ b/libc/src/string/strcoll_l.cpp
@@ -0,0 +1,24 @@
+//===-- Implementation of strcoll_l ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strcoll_l.h"
+
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locales.
+LLVM_LIBC_FUNCTION(int, strcoll_l,
+                   (const char *left, const char *right, locale_t)) {
+  for (; *left && *left == *right; ++left, ++right)
+    ;
+  return static_cast<int>(*left) - static_cast<int>(*right);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/strcoll_l.h b/libc/src/string/strcoll_l.h
new file mode 100644
index 00000000000000..97230fb811236c
--- /dev/null
+++ b/libc/src/string/strcoll_l.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for strcoll_l ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRCOLL_L_H
+#define LLVM_LIBC_SRC_STRING_STRCOLL_L_H
+
+#include "include/llvm-libc-types/locale_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int strcoll_l(const char *left, const char *right, locale_t locale);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_STRCOLL_L_H
diff --git a/libc/src/string/strxfrm_l.cpp b/libc/src/string/strxfrm_l.cpp
new file mode 100644
index 00000000000000..ae758e1fcba6d8
--- /dev/null
+++ b/libc/src/string/strxfrm_l.cpp
@@ -0,0 +1,28 @@
+//===-- Implementation of strxfrm_l ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/string/strxfrm_l.h"
+#include "src/__support/macros/config.h"
+#include "src/string/memory_utils/inline_memcpy.h"
+#include "src/string/string_utils.h"
+
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locales.
+LLVM_LIBC_FUNCTION(size_t, strxfrm_l,
+                   (char *__restrict dest, const char *__restrict src, size_t n,
+                    locale_t)) {
+  size_t len = internal::string_length(src);
+  if (n > len)
+    inline_memcpy(dest, src, len + 1);
+  return len;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/strxfrm_l.h b/libc/src/string/strxfrm_l.h
new file mode 100644
index 00000000000000..af0f181601184b
--- /dev/null
+++ b/libc/src/string/strxfrm_l.h
@@ -0,0 +1,23 @@
+//===-- Implementation header for strxfrm_l ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_STRXFRM_L_H
+#define LLVM_LIBC_SRC_STRING_STRXFRM_L_H
+
+#include "include/llvm-libc-types/locale_t.h"
+#include "src/__support/macros/config.h"
+#include <stddef.h> // For size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t strxfrm_l(char *__restrict dest, const char *__restrict src, size_t n,
+                 locale_t locale);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_STRXFRM_L_H



More information about the libc-commits mailing list