[libc-commits] [libc] [libc] Add wcsxfrm (PR #191692)

Arya C S via libc-commits libc-commits at lists.llvm.org
Thu Apr 16 04:26:27 PDT 2026


https://github.com/AryaCS111 updated https://github.com/llvm/llvm-project/pull/191692

>From 46bdc34770f2e0e2f9419ff54bc076bcce6238a5 Mon Sep 17 00:00:00 2001
From: "arya.cs" <arya.cs at blackfigtech.com>
Date: Sun, 12 Apr 2026 12:25:11 +0530
Subject: [PATCH 1/3] [libc] Add wcsxfrm

---
 libc/config/linux/x86_64/entrypoints.txt |  1 +
 libc/include/wchar.yaml                  |  8 ++
 libc/src/wchar/CMakeLists.txt            |  8 ++
 libc/src/wchar/wcsxfrm.cpp               | 51 ++++++++++++
 libc/src/wchar/wcsxfrm.h                 | 16 ++++
 libc/test/src/wchar/CMakeLists.txt       | 10 +++
 libc/test/src/wchar/wcsxfrm_test.cpp     | 98 ++++++++++++++++++++++++
 7 files changed, 192 insertions(+)
 create mode 100644 libc/src/wchar/wcsxfrm.cpp
 create mode 100644 libc/src/wchar/wcsxfrm.h
 create mode 100644 libc/test/src/wchar/wcsxfrm_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 92324118a51c8..d1bac208ffd5e 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -399,6 +399,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wmemset
     libc.src.wchar.wcschr
     libc.src.wchar.wcsncmp
+    libc.src.wchar.wcsxfrm
     libc.src.wchar.wcscmp
     libc.src.wchar.wcspbrk
     libc.src.wchar.wcsrchr
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1bd829dc5efd6..6d6fc26c99fe9 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -354,3 +354,11 @@ functions:
     arguments:
       - type: const wchar_t *__restrict
       - type: wchar_t **__restrict
+  - name: wcsxfrm
+    standards:
+      - stdc
+    return_type: size_t
+    arguments:
+      - type: wchar_t *__restrict
+      - type: const wchar_t *__restrict
+      - type: size_t
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ce57199b0837a..34283b4632c1d 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -543,3 +543,11 @@ add_entrypoint_object(
     libc.hdr.types.size_t
     libc.hdr.wchar_macros
 )
+
+add_entrypoint_object(
+  wcsxfrm
+  SRCS
+    wcsxfrm.cpp
+  HDRS
+    wcsxfrm.h
+)
diff --git a/libc/src/wchar/wcsxfrm.cpp b/libc/src/wchar/wcsxfrm.cpp
new file mode 100644
index 0000000000000..6792540b94f23
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.cpp
@@ -0,0 +1,51 @@
+//===-- Implementation of wcsxfrm ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locale-aware collation keys.
+// For now, this implements C/POSIX-like behavior: the transformed form is the
+// original wide string itself, so comparing transformed strings with wcscmp
+// matches code-point order.
+LLVM_LIBC_FUNCTION(size_t, wcsxfrm,
+                   (wchar_t *__restrict dest, const wchar_t *__restrict src,
+                    size_t n)) {
+  // Number of source characters that may be written before the trailing NUL.
+  const size_t write_limit = n > 0 ? n - 1 : 0;
+
+  size_t i = 0;
+
+  // Single pass over the prefix we might need to copy.
+  // This avoids a full wcslen(src) pass for the common case where the source
+  // fits in the destination buffer.
+  for (; i < write_limit; ++i) {
+    const wchar_t ch = src[i];
+    if (ch == L'\0') {
+      dest[i] = L'\0';
+      return i;
+    }
+    dest[i] = ch;
+  }
+
+  // If n > 0, always NUL-terminate. This is correct both when truncating and
+  // when write_limit == 0 (i.e. n == 1).
+  if (n > 0)
+    dest[write_limit] = L'\0';
+
+  // Finish counting the remaining source length if we truncated or if n == 0.
+  while (src[i] != L'\0')
+    ++i;
+
+  return i;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
new file mode 100644
index 0000000000000..653633227f1f5
--- /dev/null
+++ b/libc/src/wchar/wcsxfrm.h
@@ -0,0 +1,16 @@
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+#define LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
+
+#include "src/__support/macros/config.h"
+
+#include <stddef.h>
+#include <wchar.h>
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t wcsxfrm(wchar_t *__restrict dest, const wchar_t *__restrict src,
+               size_t n);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7a7cfaee7f367..50359a37e212c 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -517,3 +517,13 @@ add_libc_test(
     libc.src.wchar.wcstold
     libc.test.UnitTest.ErrnoCheckingTest
 )
+
+add_libc_unittest(
+  wcsxfrm_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcsxfrm_test.cpp
+  DEPENDS
+    libc.src.wchar.wcsxfrm
+)
diff --git a/libc/test/src/wchar/wcsxfrm_test.cpp b/libc/test/src/wchar/wcsxfrm_test.cpp
new file mode 100644
index 0000000000000..5470d3bb79f71
--- /dev/null
+++ b/libc/test/src/wchar/wcsxfrm_test.cpp
@@ -0,0 +1,98 @@
+//===-- Unittests for wcsxfrm --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcsxfrm.h"
+#include "test/UnitTest/Test.h"
+
+#define EXPECT_WCHAR_EQ(ACTUAL, EXPECTED)                                      \
+  EXPECT_EQ(static_cast<int>(ACTUAL), static_cast<int>(EXPECTED))
+
+TEST(LlvmLibcWCSXfrmTest, EmptyString) {
+  wchar_t dest[8];
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"", 8);
+
+  EXPECT_EQ(result, size_t(0));
+  EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, NullDestinationWhenCountIsZero) {
+  size_t result = LIBC_NAMESPACE::wcsxfrm(nullptr, L"abc", 0);
+  EXPECT_EQ(result, size_t(3));
+}
+
+TEST(LlvmLibcWCSXfrmTest, CopiesWholeStringWhenBufferIsLargeEnough) {
+  wchar_t dest[16];
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 16);
+
+  EXPECT_EQ(result, size_t(5));
+  EXPECT_WCHAR_EQ(dest[0], L'h');
+  EXPECT_WCHAR_EQ(dest[1], L'e');
+  EXPECT_WCHAR_EQ(dest[2], L'l');
+  EXPECT_WCHAR_EQ(dest[3], L'l');
+  EXPECT_WCHAR_EQ(dest[4], L'o');
+  EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, ExactFitIncludingNullTerminator) {
+  wchar_t dest[6];
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 6);
+
+  EXPECT_EQ(result, size_t(5));
+  EXPECT_WCHAR_EQ(dest[0], L'h');
+  EXPECT_WCHAR_EQ(dest[1], L'e');
+  EXPECT_WCHAR_EQ(dest[2], L'l');
+  EXPECT_WCHAR_EQ(dest[3], L'l');
+  EXPECT_WCHAR_EQ(dest[4], L'o');
+  EXPECT_WCHAR_EQ(dest[5], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, TruncatesAndNullTerminates) {
+  wchar_t dest[4];
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 4);
+
+  EXPECT_EQ(result, size_t(5));
+  EXPECT_WCHAR_EQ(dest[0], L'h');
+  EXPECT_WCHAR_EQ(dest[1], L'e');
+  EXPECT_WCHAR_EQ(dest[2], L'l');
+  EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, BufferSizeOneWritesOnlyNullTerminator) {
+  wchar_t dest[1];
+  dest[0] = L'x';
+
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 1);
+
+  EXPECT_EQ(result, size_t(5));
+  EXPECT_WCHAR_EQ(dest[0], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, DoesNotWriteWhenCountIsZero) {
+  wchar_t dest[4] = {L'x', L'y', L'z', L'\0'};
+
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, L"hello", 0);
+
+  EXPECT_EQ(result, size_t(5));
+  EXPECT_WCHAR_EQ(dest[0], L'x');
+  EXPECT_WCHAR_EQ(dest[1], L'y');
+  EXPECT_WCHAR_EQ(dest[2], L'z');
+  EXPECT_WCHAR_EQ(dest[3], L'\0');
+}
+
+TEST(LlvmLibcWCSXfrmTest, WideCharactersAreHandledCorrectly) {
+  wchar_t dest[8];
+  const wchar_t src[] = {L'A', L'\u03A9', L'\u2603', L'\0'};
+
+  size_t result = LIBC_NAMESPACE::wcsxfrm(dest, src, 8);
+
+  EXPECT_EQ(result, size_t(3));
+  EXPECT_WCHAR_EQ(dest[0], L'A');
+  EXPECT_WCHAR_EQ(dest[1], L'\u03A9');
+  EXPECT_WCHAR_EQ(dest[2], L'\u2603');
+  EXPECT_WCHAR_EQ(dest[3], L'\0');
+}

>From 39bb2e3e994a8e0e45915ae1ff8b7617bcaa5305 Mon Sep 17 00:00:00 2001
From: "arya.cs" <arya.cs at blackfigtech.com>
Date: Thu, 16 Apr 2026 09:56:33 +0530
Subject: [PATCH 2/3] [libc] Address review comments for wcsxfrm

---
 libc/src/wchar/CMakeLists.txt | 4 ++++
 libc/src/wchar/wcsxfrm.cpp    | 3 +++
 libc/src/wchar/wcsxfrm.h      | 4 ++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 34283b4632c1d..55891fbf50eff 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -550,4 +550,8 @@ add_entrypoint_object(
     wcsxfrm.cpp
   HDRS
     wcsxfrm.h
+  DEPENDS
+    libc.hdr.types.size_t
+    libc.hdr.types.wchar_t
+    libc.src.__support.macros.config
 )
diff --git a/libc/src/wchar/wcsxfrm.cpp b/libc/src/wchar/wcsxfrm.cpp
index 6792540b94f23..37cda8eff0c7f 100644
--- a/libc/src/wchar/wcsxfrm.cpp
+++ b/libc/src/wchar/wcsxfrm.cpp
@@ -8,6 +8,9 @@
 
 #include "src/wchar/wcsxfrm.h"
 
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
 #include "src/__support/common.h"
 
 namespace LIBC_NAMESPACE_DECL {
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
index 653633227f1f5..65fa47d73386a 100644
--- a/libc/src/wchar/wcsxfrm.h
+++ b/libc/src/wchar/wcsxfrm.h
@@ -3,8 +3,8 @@
 
 #include "src/__support/macros/config.h"
 
-#include <stddef.h>
-#include <wchar.h>
+#include "hdr/types/size_t.h"
+#include "hdr/types/wchar_t.h"
 
 namespace LIBC_NAMESPACE_DECL {
 

>From ff3d9225fbdd8c01d3c4a125e13838f8fa7f3c00 Mon Sep 17 00:00:00 2001
From: "arya.cs" <arya.cs at blackfigtech.com>
Date: Thu, 16 Apr 2026 16:55:16 +0530
Subject: [PATCH 3/3] [libc] Address review comments for wcsxfrm

---
 libc/src/wchar/CMakeLists.txt        | 1 +
 libc/src/wchar/wcsxfrm.h             | 8 ++++++++
 libc/test/src/wchar/wcsxfrm_test.cpp | 2 ++
 3 files changed, 11 insertions(+)

diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index 55891fbf50eff..fc273b0153a2b 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -554,4 +554,5 @@ add_entrypoint_object(
     libc.hdr.types.size_t
     libc.hdr.types.wchar_t
     libc.src.__support.macros.config
+    libc.src.__support.common
 )
diff --git a/libc/src/wchar/wcsxfrm.h b/libc/src/wchar/wcsxfrm.h
index 65fa47d73386a..7d24598f763d1 100644
--- a/libc/src/wchar/wcsxfrm.h
+++ b/libc/src/wchar/wcsxfrm.h
@@ -1,3 +1,11 @@
+//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
 #ifndef LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
 #define LLVM_LIBC_SRC_WCHAR_WCSXFRM_H
 
diff --git a/libc/test/src/wchar/wcsxfrm_test.cpp b/libc/test/src/wchar/wcsxfrm_test.cpp
index 5470d3bb79f71..5d28d38448661 100644
--- a/libc/test/src/wchar/wcsxfrm_test.cpp
+++ b/libc/test/src/wchar/wcsxfrm_test.cpp
@@ -9,6 +9,8 @@
 #include "src/wchar/wcsxfrm.h"
 #include "test/UnitTest/Test.h"
 
+// TODO: Remove this once the test framework supports direct wchar_t
+// comparisons in EXPECT_EQ.
 #define EXPECT_WCHAR_EQ(ACTUAL, EXPECTED)                                      \
   EXPECT_EQ(static_cast<int>(ACTUAL), static_cast<int>(EXPECTED))
 



More information about the libc-commits mailing list