[libc-commits] [libc] [libc] Implement wcscoll (PR #192778)
Hardik Chona via libc-commits
libc-commits at lists.llvm.org
Thu Apr 23 10:49:12 PDT 2026
https://github.com/un-pixelated updated https://github.com/llvm/llvm-project/pull/192778
>From bb350ddb9f7c65e6e5fa8b782f158bbdc0c112ba Mon Sep 17 00:00:00 2001
From: Hardik Chona <masterhc321 at gmail.com>
Date: Sat, 18 Apr 2026 11:43:23 +0530
Subject: [PATCH] [libc] Implement wcscoll
Implemented wcscoll.h
Updated yaml for hdrgen
Updated wchar cmakelists
updated linux/x86_64 entrypoints.txt
added unit tests
fixed mismatched directory
Updated CMakeLists.txt for wcscoll
nit: clang formatting
Add newlines to eof
---
libc/config/linux/x86_64/entrypoints.txt | 1 +
libc/include/wchar.yaml | 9 +-
libc/src/wchar/CMakeLists.txt | 10 ++
libc/src/wchar/wcscoll.cpp | 29 +++++
libc/src/wchar/wcscoll.h | 21 ++++
libc/test/src/wchar/CMakeLists.txt | 10 ++
libc/test/src/wchar/wcscoll_test.cpp | 139 +++++++++++++++++++++++
7 files changed, 218 insertions(+), 1 deletion(-)
create mode 100644 libc/src/wchar/wcscoll.cpp
create mode 100644 libc/src/wchar/wcscoll.h
create mode 100644 libc/test/src/wchar/wcscoll_test.cpp
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index eeaccc863dbc3..2f04dc8138e8a 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -399,6 +399,7 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.wchar.wcschr
libc.src.wchar.wcsncmp
libc.src.wchar.wcscmp
+ libc.src.wchar.wcscoll
libc.src.wchar.wcspbrk
libc.src.wchar.wcsrchr
libc.src.wchar.wcsspn
diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml
index 1bd829dc5efd6..34d315e1eaa36 100644
--- a/libc/include/wchar.yaml
+++ b/libc/include/wchar.yaml
@@ -1,6 +1,6 @@
header: wchar.h
standards:
- - stdc
+ - stdc
macros:
- macro_name: "NULL"
macro_header: null-macro.h
@@ -124,6 +124,13 @@ functions:
arguments:
- type: const wchar_t *
- type: const wchar_t *
+ - name: wcscoll
+ standards:
+ - stdc
+ return_type: int
+ arguments:
+ - type: const wchar_t *
+ - type: const wchar_t *
- name: wcspbrk
standards:
- stdc
diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt
index ce57199b0837a..2206e1b0120ce 100644
--- a/libc/src/wchar/CMakeLists.txt
+++ b/libc/src/wchar/CMakeLists.txt
@@ -332,6 +332,16 @@ add_entrypoint_object(
libc.hdr.wchar_macros
)
+add_entrypoint_object(
+ wcscoll
+ SRCS
+ wcscoll.cpp
+ HDRS
+ wcscoll.h
+ DEPENDS
+ libc.hdr.wchar_macros
+)
+
add_entrypoint_object(
wcsdup
SRCS
diff --git a/libc/src/wchar/wcscoll.cpp b/libc/src/wchar/wcscoll.cpp
new file mode 100644
index 0000000000000..abe86e1c9109e
--- /dev/null
+++ b/libc/src/wchar/wcscoll.cpp
@@ -0,0 +1,29 @@
+//===-- Implementation of wcscoll -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/wchar/wcscoll.h"
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/null_check.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+// TODO: Add support for locales.
+LLVM_LIBC_FUNCTION(int, wcscoll, (const wchar_t *s1, const wchar_t *s2)) {
+ LIBC_CRASH_ON_NULLPTR(s1);
+ LIBC_CRASH_ON_NULLPTR(s2);
+
+ for (; *s1 && (*s1 == *s2); ++s1, ++s2)
+ ;
+
+ return *s1 - *s2;
+}
+
+} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/wchar/wcscoll.h b/libc/src/wchar/wcscoll.h
new file mode 100644
index 0000000000000..bd499e9f41ced
--- /dev/null
+++ b/libc/src/wchar/wcscoll.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for wcscoll ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
+#define LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
+
+#include "hdr/types/wchar_t.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+int wcscoll(const wchar_t *s1, const wchar_t *s2);
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_WCHAR_WCSCOLL_H
diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt
index 7a7cfaee7f367..bb1ea28907126 100644
--- a/libc/test/src/wchar/CMakeLists.txt
+++ b/libc/test/src/wchar/CMakeLists.txt
@@ -242,6 +242,16 @@ add_libc_test(
libc.src.wchar.wcscmp
)
+add_libc_test(
+ wcscoll_test
+ SUITE
+ libc_wchar_unittests
+ SRCS
+ wcscoll_test.cpp
+ DEPENDS
+ libc.src.wchar.wcscoll
+)
+
add_libc_test(
wcspbrk_test
SUITE
diff --git a/libc/test/src/wchar/wcscoll_test.cpp b/libc/test/src/wchar/wcscoll_test.cpp
new file mode 100644
index 0000000000000..b611d1205540c
--- /dev/null
+++ b/libc/test/src/wchar/wcscoll_test.cpp
@@ -0,0 +1,139 @@
+//===-- Unittests for wcscoll ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "hdr/signal_macros.h"
+#include "src/wchar/wcscoll.h"
+#include "test/UnitTest/Test.h"
+
+// TODO: Add more comprehensive tests once locale support is added.
+
+TEST(LlvmLibcWcscollTest, EmptyStringsShouldReturnZero) {
+ const wchar_t *s1 = L"";
+ const wchar_t *s2 = L"";
+
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_EQ(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EmptyStringShouldNotEqualNonEmptyString) {
+ const wchar_t *empty = L"";
+ const wchar_t *s = L"abc";
+
+ // An empty string comes before a non empty one lexicographically, so lt 0
+ int result = LIBC_NAMESPACE::wcscoll(empty, s);
+ ASSERT_LT(result, 0);
+
+ // Check the reversed behaviour
+ result = LIBC_NAMESPACE::wcscoll(s, empty);
+ ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EqualStringsShouldReturnZero) {
+ const wchar_t *s1 = L"abc";
+ const wchar_t *s2 = L"abc";
+
+ // Check if it returns 0 for two equal strings
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_EQ(result, 0);
+
+ // Verify for reversed operands
+ result = LIBC_NAMESPACE::wcscoll(s2, s1);
+ ASSERT_EQ(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, LexicographicalTest) {
+ const wchar_t *s1 = L"abc";
+ const wchar_t *s2 = L"def";
+
+ // Check if it returns lt 0 for (abc, def)
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_LT(result, 0);
+
+ // Check if it returns gt 0 for (def, abc)
+ result = LIBC_NAMESPACE::wcscoll(s2, s1);
+ ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, NonAsciiTest) {
+ const wchar_t *s1 = L"AbCdEf__1230!! \u1111";
+ const wchar_t *s2 = L"AbCdEf__1230!! \u1111\u2222";
+
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s2, s1);
+ ASSERT_GT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s1, s1);
+ ASSERT_EQ(result, 0);
+
+ // Empty string
+ const wchar_t *empty = L"";
+ result = LIBC_NAMESPACE::wcscoll(empty, s1);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s1, empty);
+ ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, EightDigitUCNTest) {
+ const wchar_t *s1 = L"abC\U0001F44D"; // thumbs up emoji
+ const wchar_t *s2 = L"abC\U0001F44E"; // thumbs down emoji
+
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s2, s1);
+ ASSERT_GT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s1, s1);
+ ASSERT_EQ(result, 0);
+
+ // empty string
+ const wchar_t *empty = L"";
+ result = LIBC_NAMESPACE::wcscoll(empty, s1);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s1, empty);
+ ASSERT_GT(result, 0);
+}
+
+TEST(LlvmLibcWcscollTest, AsciiVsNonAsciiTest) {
+ const wchar_t *s1 = L"a";
+ const wchar_t *s2 = L"\uFFFF";
+ const wchar_t *s3 = L"\U0001000F";
+
+ // ascii and 4 digit unicode
+ int result = LIBC_NAMESPACE::wcscoll(s1, s2);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s2, s1);
+ ASSERT_GT(result, 0);
+
+ // ascii and 8 digit unicode
+ result = LIBC_NAMESPACE::wcscoll(s1, s3);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s3, s1);
+ ASSERT_GT(result, 0);
+
+ // 4 digit unicode and 8 digit unicode
+ result = LIBC_NAMESPACE::wcscoll(s2, s3);
+ ASSERT_LT(result, 0);
+
+ result = LIBC_NAMESPACE::wcscoll(s3, s2);
+ ASSERT_GT(result, 0);
+}
+
+#if defined(LIBC_ADD_NULL_CHECKS)
+TEST(LlvmLibcWcscollTest, NULLCheck) {
+ // Passing in a nullptr should crash the program
+ EXPECT_DEATH([] { LIBC_NAMESPACE::wcscoll(L"", nullptr); }, WITH_SIGNAL(-1));
+ EXPECT_DEATH([] { LIBC_NAMESPACE::wcscoll(nullptr, L""); }, WITH_SIGNAL(-1));
+}
+#endif // LIBC_ADD_NULL_CHECKS
More information about the libc-commits
mailing list