[libc-commits] [libc] [libc] Add wcsxfrm (PR #191692)
Arya C S via libc-commits
libc-commits at lists.llvm.org
Thu Apr 16 03:06:03 PDT 2026
https://github.com/AryaCS111 updated https://github.com/llvm/llvm-project/pull/191692
>From f2df9d5abbb8541f5e01e2b98f3b82a876624e53 Mon Sep 17 00:00:00 2001
From: "arya.cs" <arya.cs at blackfigtech.com>
Date: Sun, 12 Apr 2026 12:25:11 +0530
Subject: [PATCH 1/2] [libc] Add wcsxfrm
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 8 ++
libc/src/wchar/CMakeLists.txt | 8 ++
libc/src/wchar/wcsxfrm.cpp | 51 ++++++++++++
libc/src/wchar/wcsxfrm.h | 16 ++++
libc/test/src/wchar/CMakeLists.txt | 10 +++
libc/test/src/wchar/wcsxfrm_test.cpp | 98 ++++++++++++++++++++++++
7 files changed, 192 insertions(+)
create mode 100644 libc/src/wchar/wcsxfrm.cpp
create mode 100644 libc/src/wchar/wcsxfrm.h
create mode 100644 libc/test/src/wchar/wcsxfrm_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 9476ebbad1517..139e15d726111 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -399,6 +399,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wmemset
libc.src.wchar.wcschr
libc.src.wchar.wcsncmp
+ libc.src.wchar.wcsxfrm
libc.src.wchar.wcscmp
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1bd829dc5efd6..6d6fc26c99fe9 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -354,3 +354,11 @@ functions:
arguments:
- type: const wchar_t *__restrict
- type: wchar_t **__restrict
+ - name: wcsxfrm
+ standards:
+ - stdc
+ return_type: size_t
+ arguments:
+ - type: wchar_t *__restrict
+ - type: const wchar_t *__restrict
+ - type: size_t
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ce57199b0837a..34283b4632c1d 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -543,3 +543,11 @@ add_entrypoint_object(
libc.hdr.types.size_t
libc.hdr.wchar_macros
)
+
+add_entrypoint_object(
+ wcsxfrm
+ SRCS
+ wcsxfrm.cpp
+ HDRS
+ wcsxfrm.h
+)
diff --git a/libc/src/wchar/wcsxfrm.cpp b/libc/src/wchar/wcsxfrm.cpp
new file mode 100644
index 0000000000000..6792540b94f23
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.cpp
@@ -0,0 +1,51 @@
+//===-- Implementation of wcsxfrm ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locale-aware collation keys.
+// For now, this implements C/POSIX-like behavior: the transformed form is the
+// original wide string itself, so comparing transformed strings with wcscmp
+// matches code-point order.
+LLVM_LIBC_FUNCTION(size_t, wcsxfrm,
+ (wchar_t *__restrict dest, const wchar_t *__restrict src,
+ size_t n)) {
+ // Number of source characters that may be written before the trailing NUL.
+ const size_t write_limit = n > 0 ? n - 1 : 0;
+
+ size_t i = 0;
+
+ // Single pass over the prefix we might need to copy.
+ // This avoids a full wcslen(src) pass for the common case where the source
+ // fits in the destination buffer.
+ for (; i < write_limit; ++i) {
+ const wchar_t ch = src[i];
+ if (ch == L'\0') {
+ dest[i] = L'\0';
+ return i;
+ }
+ dest[i] = ch;
+ }
+
+ // If n > 0, always NUL-terminate. This is correct both when truncating and
+ // when write_limit == 0 (i.e. n == 1).
+ if (n > 0)
+ dest[write_limit] = L'\0';
+
+ // Finish counting the remaining source length if we truncated or if n == 0.
+ while (src[i] != L'\0')
+ ++i;
+
+ return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
new file mode 100644
index 0000000000000..653633227f1f5
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.h
@@ -0,0 +1,16 @@
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+#define LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+
+#include "src/__support/macros/config.h"
+
+#include <stddef.h>
+#include <wchar.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsxfrm(wchar_t *__restrict dest, const wchar_t *__restrict src,
+ size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7a7cfaee7f367..50359a37e212c 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -517,3 +517,13 @@ add_libc_test(
libc.src.wchar.wcstold
libc.test.UnitTest.ErrnoCheckingTest
)
+
+add_libc_unittest(
+ wcsxfrm_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcsxfrm_test.cpp
+ DEPENDS
+ libc.src.wchar.wcsxfrm
+)
diff --git a/libc/test/src/wchar/wcsxfrm_test.cpp b/libc/test/src/wchar/wcsxfrm_test.cpp
new file mode 100644
index 0000000000000..5470d3bb79f71
--- /dev/null
+++ b/libc/test/src/wchar/wcsxfrm_test.cpp
@@ -0,0 +1,98 @@
+//===-- Unittests for wcsxfrm --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+#include "test/UnitTest/Test.h"
+
+#define EXPECT_WCHAR_EQ(ACTUAL, EXPECTED) \
+ EXPECT_EQ(static_cast<int>(ACTUAL), static_cast<int>(EXPECTED))
+
+TEST(LlvmLibcWCSXfrmTest, EmptyString) {
+ wchar_t dest[8];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"", 8);
+
+ EXPECT_EQ(result, size_t(0));
+ EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, NullDestinationWhenCountIsZero) {
+ size_t result = LIBC_NAMESPACE::wcsxfrm(nullptr, L"abc", 0);
+ EXPECT_EQ(result, size_t(3));
+}
+
+TEST(LlvmLibcWCSXfrmTest, CopiesWholeStringWhenBufferIsLargeEnough) {
+ wchar_t dest[16];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 16);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'l');
+ EXPECT_WCHAR_EQ(dest[4], L'o');
+ EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, ExactFitIncludingNullTerminator) {
+ wchar_t dest[6];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 6);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'l');
+ EXPECT_WCHAR_EQ(dest[4], L'o');
+ EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, TruncatesAndNullTerminates) {
+ wchar_t dest[4];
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 4);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'h');
+ EXPECT_WCHAR_EQ(dest[1], L'e');
+ EXPECT_WCHAR_EQ(dest[2], L'l');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, BufferSizeOneWritesOnlyNullTerminator) {
+ wchar_t dest[1];
+ dest[0] = L'x';
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 1);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, DoesNotWriteWhenCountIsZero) {
+ wchar_t dest[4] = {L'x', L'y', L'z', L'\0'};
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 0);
+
+ EXPECT_EQ(result, size_t(5));
+ EXPECT_WCHAR_EQ(dest[0], L'x');
+ EXPECT_WCHAR_EQ(dest[1], L'y');
+ EXPECT_WCHAR_EQ(dest[2], L'z');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, WideCharactersAreHandledCorrectly) {
+ wchar_t dest[8];
+ const wchar_t src[] = {L'A', L'\u03A9', L'\u2603', L'\0'};
+
+ size_t result = LIBC_NAMESPACE::wcsxfrm(dest, src, 8);
+
+ EXPECT_EQ(result, size_t(3));
+ EXPECT_WCHAR_EQ(dest[0], L'A');
+ EXPECT_WCHAR_EQ(dest[1], L'\u03A9');
+ EXPECT_WCHAR_EQ(dest[2], L'\u2603');
+ EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
>From 7eb94e7c666f23d2ef602b13986960c637b7a406 Mon Sep 17 00:00:00 2001
From: "arya.cs" <arya.cs at blackfigtech.com>
Date: Thu, 16 Apr 2026 09:56:33 +0530
Subject: [PATCH 2/2] [libc] Address review comments for wcsxfrm
---
libc/src/wchar/CMakeLists.txt | 4 ++++
libc/src/wchar/wcsxfrm.cpp | 3 +++
libc/src/wchar/wcsxfrm.h | 4 ++--
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 34283b4632c1d..55891fbf50eff 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -550,4 +550,8 @@ add_entrypoint_object(
wcsxfrm.cpp
HDRS
wcsxfrm.h
+ DEPENDS
+ libc.hdr.types.size_t
+ libc.hdr.types.wchar_t
+ libc.src.__support.macros.config
)
diff --git a/libc/src/wchar/wcsxfrm.cpp b/libc/src/wchar/wcsxfrm.cpp
index 6792540b94f23..37cda8eff0c7f 100644
--- a/libc/src/wchar/wcsxfrm.cpp
+++ b/libc/src/wchar/wcsxfrm.cpp
@@ -8,6 +8,9 @@
#include "src/wchar/wcsxfrm.h"
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
#include "src/__support/common.h"
namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
index 653633227f1f5..65fa47d73386a 100644
--- a/libc/src/wchar/wcsxfrm.h
+++ b/libc/src/wchar/wcsxfrm.h
@@ -3,8 +3,8 @@
#include "src/__support/macros/config.h"
-#include <stddef.h>
-#include <wchar.h>
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
namespace LIBC_NAMESPACE_DECL {
More information about the libc-commits
mailing list