[libc-commits] [libc] [libc] Use unsigned char in strcmp, strncmp, and strcoll comparisons (PR #185393)
via libc-commits
libc-commits at lists.llvm.org
Mon Mar 9 03:50:59 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: Jeff Bailey (kaladron)
<details>
<summary>Changes</summary>
According to section 7.24.1 of the C standard, character comparison in string functions must be performed as if the characters had the type `unsigned char`.
The previous implementations of `strcmp`, `strncmp`, and `strcoll` were doing a direct subtraction of `char` values. On platforms where `char` is signed, this resulted in incorrect negative values being returned when characters exceeding 127 were being compared.
This patch fixes the comparison functions to explicitly cast the character values to `unsigned char` prior to computing their difference. It also adds regression tests to ensure the comparison behaves correctly for ASCII values greater than 127.
---
Full diff: https://github.com/llvm/llvm-project/pull/185393.diff
6 Files Affected:
- (modified) libc/src/string/strcmp.cpp (+3-1)
- (modified) libc/src/string/strcoll.cpp (+1-1)
- (modified) libc/src/string/strncmp.cpp (+4-1)
- (modified) libc/test/src/string/strcmp_test.cpp (+7)
- (modified) libc/test/src/string/strcoll_test.cpp (+7)
- (modified) libc/test/src/string/strncmp_test.cpp (+7)
``````````diff
diff --git a/libc/src/string/strcmp.cpp b/libc/src/string/strcmp.cpp
index f303ba5c09665..54a2f08b331ca 100644
--- a/libc/src/string/strcmp.cpp
+++ b/libc/src/string/strcmp.cpp
@@ -15,7 +15,9 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, strcmp, (const char *left, const char *right)) {
- auto comp = [](char l, char r) -> int { return l - r; };
+ auto comp = [](char l, char r) -> int {
+ return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+ };
return inline_strcmp(left, right, comp);
}
diff --git a/libc/src/string/strcoll.cpp b/libc/src/string/strcoll.cpp
index aa08f7194c9e5..7aa7cc2801ad8 100644
--- a/libc/src/string/strcoll.cpp
+++ b/libc/src/string/strcoll.cpp
@@ -20,7 +20,7 @@ LLVM_LIBC_FUNCTION(int, strcoll, (const char *left, const char *right)) {
LIBC_CRASH_ON_NULLPTR(right);
for (; *left && *left == *right; ++left, ++right)
;
- return static_cast<int>(*left) - static_cast<int>(*right);
+ return static_cast<unsigned char>(*left) - static_cast<unsigned char>(*right);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/strncmp.cpp b/libc/src/string/strncmp.cpp
index f21fd769f3943..952f85d4da795 100644
--- a/libc/src/string/strncmp.cpp
+++ b/libc/src/string/strncmp.cpp
@@ -23,7 +23,10 @@ LLVM_LIBC_FUNCTION(int, strncmp,
LIBC_CRASH_ON_NULLPTR(left);
LIBC_CRASH_ON_NULLPTR(right);
}
- auto comp = [](char l, char r) -> int { return l - r; };
+
+ auto comp = [](char l, char r) -> int {
+ return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+ };
return inline_strncmp(left, right, n, comp);
}
diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp
index 234447610222f..8844d8463d416 100644
--- a/libc/test/src/string/strcmp_test.cpp
+++ b/libc/test/src/string/strcmp_test.cpp
@@ -106,3 +106,10 @@ TEST(LlvmLibcStrCmpTest, Case) {
result = LIBC_NAMESPACE::strcmp(s2, s1);
ASSERT_GT(result, 0);
}
+
+TEST(LlvmLibcStrCmpTest, CharactersGreaterThan127ShouldBePositive) {
+ const char s1[] = {static_cast<char>(128), '\0'};
+ const char s2[] = {'\0'};
+ int result = LIBC_NAMESPACE::strcmp(s1, s2);
+ ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strcoll_test.cpp b/libc/test/src/string/strcoll_test.cpp
index 1be7568aff6ee..f1e93afbf467f 100644
--- a/libc/test/src/string/strcoll_test.cpp
+++ b/libc/test/src/string/strcoll_test.cpp
@@ -38,3 +38,10 @@ TEST(LlvmLibcStrcollTest, CrashOnNullPtr) {
}
#endif // defined(LIBC_ADD_NULL_CHECKS)
+
+TEST(LlvmLibcStrcollTest, CharactersGreaterThan127ShouldBePositive) {
+ const char s1[] = {static_cast<char>(128), '\0'};
+ const char s2[] = {'\0'};
+ int result = LIBC_NAMESPACE::strcoll(s1, s2);
+ ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strncmp_test.cpp b/libc/test/src/string/strncmp_test.cpp
index 1724a5436d1ea..2083e3d1b7425 100644
--- a/libc/test/src/string/strncmp_test.cpp
+++ b/libc/test/src/string/strncmp_test.cpp
@@ -167,3 +167,10 @@ TEST(LlvmLibcStrNCmpTest, Case) {
result = LIBC_NAMESPACE::strncmp(s2, s1, 2);
ASSERT_GT(result, 0);
}
+
+TEST(LlvmLibcStrNCmpTest, CharactersGreaterThan127ShouldBePositive) {
+ const char s1[] = {static_cast<char>(128), '\0'};
+ const char s2[] = {'\0'};
+ int result = LIBC_NAMESPACE::strncmp(s1, s2, 1);
+ ASSERT_GT(result, 0);
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/185393
More information about the libc-commits
mailing list