[libc-commits] [libc] [libc] Use unsigned char in strcmp, strncmp, and strcoll comparisons (PR #185393)

via libc-commits libc-commits at lists.llvm.org
Mon Mar 9 03:50:59 PDT 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-libc

Author: Jeff Bailey (kaladron)

<details>
<summary>Changes</summary>

According to section 7.24.1 of the C standard, character comparison in string functions must be performed as if the characters had the type `unsigned char`.

The previous implementations of `strcmp`, `strncmp`, and `strcoll` were doing a direct subtraction of `char` values. On platforms where `char` is signed, this resulted in incorrect negative values being returned when characters exceeding 127 were being compared.

This patch fixes the comparison functions to explicitly cast the character values to `unsigned char` prior to computing their difference. It also adds regression tests to ensure the comparison behaves correctly for ASCII values greater than 127.

---
Full diff: https://github.com/llvm/llvm-project/pull/185393.diff


6 Files Affected:

- (modified) libc/src/string/strcmp.cpp (+3-1) 
- (modified) libc/src/string/strcoll.cpp (+1-1) 
- (modified) libc/src/string/strncmp.cpp (+4-1) 
- (modified) libc/test/src/string/strcmp_test.cpp (+7) 
- (modified) libc/test/src/string/strcoll_test.cpp (+7) 
- (modified) libc/test/src/string/strncmp_test.cpp (+7) 


``````````diff
diff --git a/libc/src/string/strcmp.cpp b/libc/src/string/strcmp.cpp
index f303ba5c09665..54a2f08b331ca 100644
--- a/libc/src/string/strcmp.cpp
+++ b/libc/src/string/strcmp.cpp
@@ -15,7 +15,9 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, strcmp, (const char *left, const char *right)) {
-  auto comp = [](char l, char r) -> int { return l - r; };
+  auto comp = [](char l, char r) -> int {
+    return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+  };
   return inline_strcmp(left, right, comp);
 }
 
diff --git a/libc/src/string/strcoll.cpp b/libc/src/string/strcoll.cpp
index aa08f7194c9e5..7aa7cc2801ad8 100644
--- a/libc/src/string/strcoll.cpp
+++ b/libc/src/string/strcoll.cpp
@@ -20,7 +20,7 @@ LLVM_LIBC_FUNCTION(int, strcoll, (const char *left, const char *right)) {
   LIBC_CRASH_ON_NULLPTR(right);
   for (; *left && *left == *right; ++left, ++right)
     ;
-  return static_cast<int>(*left) - static_cast<int>(*right);
+  return static_cast<unsigned char>(*left) - static_cast<unsigned char>(*right);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/strncmp.cpp b/libc/src/string/strncmp.cpp
index f21fd769f3943..952f85d4da795 100644
--- a/libc/src/string/strncmp.cpp
+++ b/libc/src/string/strncmp.cpp
@@ -23,7 +23,10 @@ LLVM_LIBC_FUNCTION(int, strncmp,
     LIBC_CRASH_ON_NULLPTR(left);
     LIBC_CRASH_ON_NULLPTR(right);
   }
-  auto comp = [](char l, char r) -> int { return l - r; };
+
+  auto comp = [](char l, char r) -> int {
+    return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+  };
   return inline_strncmp(left, right, n, comp);
 }
 
diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp
index 234447610222f..8844d8463d416 100644
--- a/libc/test/src/string/strcmp_test.cpp
+++ b/libc/test/src/string/strcmp_test.cpp
@@ -106,3 +106,10 @@ TEST(LlvmLibcStrCmpTest, Case) {
   result = LIBC_NAMESPACE::strcmp(s2, s1);
   ASSERT_GT(result, 0);
 }
+
+TEST(LlvmLibcStrCmpTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strcmp(s1, s2);
+  ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strcoll_test.cpp b/libc/test/src/string/strcoll_test.cpp
index 1be7568aff6ee..f1e93afbf467f 100644
--- a/libc/test/src/string/strcoll_test.cpp
+++ b/libc/test/src/string/strcoll_test.cpp
@@ -38,3 +38,10 @@ TEST(LlvmLibcStrcollTest, CrashOnNullPtr) {
 }
 
 #endif // defined(LIBC_ADD_NULL_CHECKS)
+
+TEST(LlvmLibcStrcollTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strcoll(s1, s2);
+  ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strncmp_test.cpp b/libc/test/src/string/strncmp_test.cpp
index 1724a5436d1ea..2083e3d1b7425 100644
--- a/libc/test/src/string/strncmp_test.cpp
+++ b/libc/test/src/string/strncmp_test.cpp
@@ -167,3 +167,10 @@ TEST(LlvmLibcStrNCmpTest, Case) {
   result = LIBC_NAMESPACE::strncmp(s2, s1, 2);
   ASSERT_GT(result, 0);
 }
+
+TEST(LlvmLibcStrNCmpTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strncmp(s1, s2, 1);
+  ASSERT_GT(result, 0);
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/185393


More information about the libc-commits mailing list