[libc-commits] [libc] [libc] Use unsigned char in strcmp, strncmp, and strcoll comparisons (PR #185393)

Jeff Bailey via libc-commits libc-commits at lists.llvm.org
Mon Mar 9 03:50:36 PDT 2026


https://github.com/kaladron updated https://github.com/llvm/llvm-project/pull/185393

>From 3cdffea707644a1da429a4c00913efe7f496f2eb Mon Sep 17 00:00:00 2001
From: Jeff Bailey <jeffbailey at google.com>
Date: Mon, 9 Mar 2026 10:42:06 +0000
Subject: [PATCH] [libc] Fix character comparison to use unsigned char in
 strcmp, strncmp, and strcoll

According to section 7.24.1 of the C standard, character comparison in string
functions must be performed as if the characters had the type `unsigned char`.

The previous implementations of `strcmp`, `strncmp`, and `strcoll` were doing
a direct subtraction of `char` values. On platforms where `char` is signed,
this resulted in incorrect negative values being returned when characters
exceeding 127 were being compared.

This patch fixes the comparison functions to explicitly cast the character
values to `unsigned char` prior to computing their difference. It also adds
regression tests to ensure the comparison behaves correctly for ASCII values
greater than 127.
---
 libc/src/string/strcmp.cpp            | 4 +++-
 libc/src/string/strcoll.cpp           | 2 +-
 libc/src/string/strncmp.cpp           | 5 ++++-
 libc/test/src/string/strcmp_test.cpp  | 7 +++++++
 libc/test/src/string/strcoll_test.cpp | 7 +++++++
 libc/test/src/string/strncmp_test.cpp | 7 +++++++
 6 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/libc/src/string/strcmp.cpp b/libc/src/string/strcmp.cpp
index f303ba5c09665..54a2f08b331ca 100644
--- a/libc/src/string/strcmp.cpp
+++ b/libc/src/string/strcmp.cpp
@@ -15,7 +15,9 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, strcmp, (const char *left, const char *right)) {
-  auto comp = [](char l, char r) -> int { return l - r; };
+  auto comp = [](char l, char r) -> int {
+    return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+  };
   return inline_strcmp(left, right, comp);
 }
 
diff --git a/libc/src/string/strcoll.cpp b/libc/src/string/strcoll.cpp
index aa08f7194c9e5..7aa7cc2801ad8 100644
--- a/libc/src/string/strcoll.cpp
+++ b/libc/src/string/strcoll.cpp
@@ -20,7 +20,7 @@ LLVM_LIBC_FUNCTION(int, strcoll, (const char *left, const char *right)) {
   LIBC_CRASH_ON_NULLPTR(right);
   for (; *left && *left == *right; ++left, ++right)
     ;
-  return static_cast<int>(*left) - static_cast<int>(*right);
+  return static_cast<unsigned char>(*left) - static_cast<unsigned char>(*right);
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/string/strncmp.cpp b/libc/src/string/strncmp.cpp
index f21fd769f3943..952f85d4da795 100644
--- a/libc/src/string/strncmp.cpp
+++ b/libc/src/string/strncmp.cpp
@@ -23,7 +23,10 @@ LLVM_LIBC_FUNCTION(int, strncmp,
     LIBC_CRASH_ON_NULLPTR(left);
     LIBC_CRASH_ON_NULLPTR(right);
   }
-  auto comp = [](char l, char r) -> int { return l - r; };
+
+  auto comp = [](char l, char r) -> int {
+    return static_cast<unsigned char>(l) - static_cast<unsigned char>(r);
+  };
   return inline_strncmp(left, right, n, comp);
 }
 
diff --git a/libc/test/src/string/strcmp_test.cpp b/libc/test/src/string/strcmp_test.cpp
index 234447610222f..8844d8463d416 100644
--- a/libc/test/src/string/strcmp_test.cpp
+++ b/libc/test/src/string/strcmp_test.cpp
@@ -106,3 +106,10 @@ TEST(LlvmLibcStrCmpTest, Case) {
   result = LIBC_NAMESPACE::strcmp(s2, s1);
   ASSERT_GT(result, 0);
 }
+
+TEST(LlvmLibcStrCmpTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strcmp(s1, s2);
+  ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strcoll_test.cpp b/libc/test/src/string/strcoll_test.cpp
index 1be7568aff6ee..f1e93afbf467f 100644
--- a/libc/test/src/string/strcoll_test.cpp
+++ b/libc/test/src/string/strcoll_test.cpp
@@ -38,3 +38,10 @@ TEST(LlvmLibcStrcollTest, CrashOnNullPtr) {
 }
 
 #endif // defined(LIBC_ADD_NULL_CHECKS)
+
+TEST(LlvmLibcStrcollTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strcoll(s1, s2);
+  ASSERT_GT(result, 0);
+}
diff --git a/libc/test/src/string/strncmp_test.cpp b/libc/test/src/string/strncmp_test.cpp
index 1724a5436d1ea..2083e3d1b7425 100644
--- a/libc/test/src/string/strncmp_test.cpp
+++ b/libc/test/src/string/strncmp_test.cpp
@@ -167,3 +167,10 @@ TEST(LlvmLibcStrNCmpTest, Case) {
   result = LIBC_NAMESPACE::strncmp(s2, s1, 2);
   ASSERT_GT(result, 0);
 }
+
+TEST(LlvmLibcStrNCmpTest, CharactersGreaterThan127ShouldBePositive) {
+  const char s1[] = {static_cast<char>(128), '\0'};
+  const char s2[] = {'\0'};
+  int result = LIBC_NAMESPACE::strncmp(s1, s2, 1);
+  ASSERT_GT(result, 0);
+}



More information about the libc-commits mailing list