[libc-commits] [libc] [libc] Implement wcscoll (PR #192778)

Hardik Chona via libc-commits libc-commits at lists.llvm.org
Thu Apr 23 10:49:12 PDT 2026


https://github.com/un-pixelated updated https://github.com/llvm/llvm-project/pull/192778

>From bb350ddb9f7c65e6e5fa8b782f158bbdc0c112ba Mon Sep 17 00:00:00 2001
From: Hardik Chona <masterhc321 at gmail.com>
Date: Sat, 18 Apr 2026 11:43:23 +0530
Subject: [PATCH] [libc] Implement wcscoll

Implemented wcscoll.h

Updated yaml for hdrgen

Updated wchar cmakelists

updated linux/x86_64 entrypoints.txt

added unit tests

fixed mismatched directory

Updated CMakeLists.txt for wcscoll

nit: clang formatting

Add newlines to eof
---
 libc/config/linux/x86_64/entrypoints.txt |   1 +
 libc/include/wchar.yaml                  |   9 +-
 libc/src/wchar/CMakeLists.txt            |  10 ++
 libc/src/wchar/wcscoll.cpp               |  29 +++++
 libc/src/wchar/wcscoll.h                 |  21 ++++
 libc/test/src/wchar/CMakeLists.txt       |  10 ++
 libc/test/src/wchar/wcscoll_test.cpp     | 139 +++++++++++++++++++++++
 7 files changed, 218 insertions(+), 1 deletion(-)
 create mode 100644 libc/src/wchar/wcscoll.cpp
 create mode 100644 libc/src/wchar/wcscoll.h
 create mode 100644 libc/test/src/wchar/wcscoll_test.cpp

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index eeaccc863dbc3..2f04dc8138e8a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -399,6 +399,7 @@ set(TARGET_LIBC_ENTRYPOINTS
     libc.src.wchar.wcschr
     libc.src.wchar.wcsncmp
     libc.src.wchar.wcscmp
+    libc.src.wchar.wcscoll
     libc.src.wchar.wcspbrk
     libc.src.wchar.wcsrchr
     libc.src.wchar.wcsspn
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1bd829dc5efd6..34d315e1eaa36 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -1,6 +1,6 @@
 header: wchar.h
 standards:
-  - stdc 
+  - stdc
 macros:
   - macro_name: "NULL"
     macro_header: null-macro.h
@@ -124,6 +124,13 @@ functions:
     arguments:
       - type: const wchar_t *
       - type: const wchar_t *
+  - name: wcscoll
+    standards:
+      - stdc
+    return_type: int
+    arguments:
+      - type: const wchar_t *
+      - type: const wchar_t *
   - name: wcspbrk
     standards:
       - stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ce57199b0837a..2206e1b0120ce 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -332,6 +332,16 @@ add_entrypoint_object(
     libc.hdr.wchar_macros
 )
 
+add_entrypoint_object(
+  wcscoll
+  SRCS
+    wcscoll.cpp
+  HDRS
+    wcscoll.h
+  DEPENDS
+    libc.hdr.wchar_macros
+)
+
 add_entrypoint_object(
   wcsdup
   SRCS
diff --git a/libc/src/wchar/wcscoll.cpp b/libc/src/wchar/wcscoll.cpp
new file mode 100644
index 0000000000000..abe86e1c9109e
--- /dev/null
+++ b/libc/src/wchar/wcscoll.cpp
@@ -0,0 +1,29 @@
+//===-- Implementation of wcscoll -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcscoll.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locales.
+LLVM_LIBC_FUNCTION(int, wcscoll, (const wchar_t *s1, const wchar_t *s2)) {
+  LIBC_CRASH_ON_NULLPTR(s1);
+  LIBC_CRASH_ON_NULLPTR(s2);
+
+  for (; *s1 && (*s1 == *s2); ++s1, ++s2)
+    ;
+
+  return *s1 - *s2;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcscoll.h b/libc/src/wchar/wcscoll.h
new file mode 100644
index 0000000000000..bd499e9f41ced
--- /dev/null
+++ b/libc/src/wchar/wcscoll.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcscoll ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
+#define LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int wcscoll(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7a7cfaee7f367..bb1ea28907126 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -242,6 +242,16 @@ add_libc_test(
     libc.src.wchar.wcscmp
 )
 
+add_libc_test(
+  wcscoll_test
+  SUITE
+    libc_wchar_unittests
+  SRCS
+    wcscoll_test.cpp
+  DEPENDS
+    libc.src.wchar.wcscoll
+)
+
 add_libc_test(
   wcspbrk_test
   SUITE
diff --git a/libc/test/src/wchar/wcscoll_test.cpp b/libc/test/src/wchar/wcscoll_test.cpp
new file mode 100644
index 0000000000000..b611d1205540c
--- /dev/null
+++ b/libc/test/src/wchar/wcscoll_test.cpp
@@ -0,0 +1,139 @@
+//===-- Unittests for wcscoll ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/signal_macros.h"
+#include "src/wchar/wcscoll.h"
+#include "test/UnitTest/Test.h"
+
+// TODO: Add more comprehensive tests once locale support is added.
+
+TEST(LlvmLibcWcscollTest, EmptyStringsShouldReturnZero) {
+  const wchar_t *s1 = L"";
+  const wchar_t *s2 = L"";
+
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_EQ(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EmptyStringShouldNotEqualNonEmptyString) {
+  const wchar_t *empty = L"";
+  const wchar_t *s = L"abc";
+
+  // An empty string comes before a non empty one lexicographically, so lt 0
+  int result = LIBC_NAMESPACE::wcscoll(empty, s);
+  ASSERT_LT(result, 0);
+
+  // Check the reversed behaviour
+  result = LIBC_NAMESPACE::wcscoll(s, empty);
+  ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EqualStringsShouldReturnZero) {
+  const wchar_t *s1 = L"abc";
+  const wchar_t *s2 = L"abc";
+
+  // Check if it returns 0 for two equal strings
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_EQ(result, 0);
+
+  // Verify for reversed operands
+  result = LIBC_NAMESPACE::wcscoll(s2, s1);
+  ASSERT_EQ(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, LexicographicalTest) {
+  const wchar_t *s1 = L"abc";
+  const wchar_t *s2 = L"def";
+
+  // Check if it returns lt 0 for (abc, def)
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_LT(result, 0);
+
+  // Check if it returns gt 0 for (def, abc)
+  result = LIBC_NAMESPACE::wcscoll(s2, s1);
+  ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, NonAsciiTest) {
+  const wchar_t *s1 = L"AbCdEf__1230!! \u1111";
+  const wchar_t *s2 = L"AbCdEf__1230!! \u1111\u2222";
+
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s2, s1);
+  ASSERT_GT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s1, s1);
+  ASSERT_EQ(result, 0);
+
+  // Empty string
+  const wchar_t *empty = L"";
+  result = LIBC_NAMESPACE::wcscoll(empty, s1);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s1, empty);
+  ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EightDigitUCNTest) {
+  const wchar_t *s1 = L"abC\U0001F44D"; // thumbs up emoji
+  const wchar_t *s2 = L"abC\U0001F44E"; // thumbs down emoji
+
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s2, s1);
+  ASSERT_GT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s1, s1);
+  ASSERT_EQ(result, 0);
+
+  // empty string
+  const wchar_t *empty = L"";
+  result = LIBC_NAMESPACE::wcscoll(empty, s1);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s1, empty);
+  ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, AsciiVsNonAsciiTest) {
+  const wchar_t *s1 = L"a";
+  const wchar_t *s2 = L"\uFFFF";
+  const wchar_t *s3 = L"\U0001000F";
+
+  // ascii and 4 digit unicode
+  int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s2, s1);
+  ASSERT_GT(result, 0);
+
+  // ascii and 8 digit unicode
+  result = LIBC_NAMESPACE::wcscoll(s1, s3);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s3, s1);
+  ASSERT_GT(result, 0);
+
+  // 4 digit unicode and 8 digit unicode
+  result = LIBC_NAMESPACE::wcscoll(s2, s3);
+  ASSERT_LT(result, 0);
+
+  result = LIBC_NAMESPACE::wcscoll(s3, s2);
+  ASSERT_GT(result, 0);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS)
+TEST(LlvmLibcWcscollTest, NULLCheck) {
+  // Passing in a nullptr should crash the program
+  EXPECT_DEATH([] { LIBC_NAMESPACE::wcscoll(L"", nullptr); }, WITH_SIGNAL(-1));
+  EXPECT_DEATH([] { LIBC_NAMESPACE::wcscoll(nullptr, L""); }, WITH_SIGNAL(-1));
+}
+#endif // LIBC_ADD_NULL_CHECKS



More information about the libc-commits mailing list