[libcxx-commits] [libcxx] [libc++] Optimize ctype::to{lower, upper} (PR #145344)

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jun 23 08:41:09 PDT 2025


https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/145344

```
----------------------------------------------
Benchmark                       old        new
--------------------------- ------------------
BM_tolower_char<char>       1.64 ns    1.41 ns
BM_tolower_char<wchar_t>    1.64 ns    1.41 ns
BM_tolower_string<char>     32.4 ns    12.8 ns
BM_tolower_string<wchar_t>  32.9 ns    15.1 ns
BM_toupper_char<char>       1.63 ns    1.64 ns
BM_toupper_char<wchar_t>    1.63 ns    1.41 ns
BM_toupper_string<char>     32.2 ns    12.7 ns
BM_toupper_string<wchar_t>  33.0 ns    15.1 ns
```


>From cf0213a4f54f5bb0362925c7b154ed82b50e3dc2 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 23 Jun 2025 17:40:34 +0200
Subject: [PATCH] [libc++] Optimize ctype::to{lower,upper}

---
 libcxx/include/__config                       |  4 -
 libcxx/include/__locale_dir/locale_base_api.h |  7 --
 .../include/__locale_dir/support/bsd_like.h   |  6 --
 libcxx/include/__locale_dir/support/linux.h   |  6 --
 .../support/no_locale/characters.h            |  6 --
 libcxx/include/__locale_dir/support/windows.h |  6 --
 libcxx/src/locale.cpp                         | 95 ++++---------------
 libcxx/test/benchmarks/locale/ctype.bench.cpp | 69 ++++++++++++++
 8 files changed, 90 insertions(+), 109 deletions(-)
 create mode 100644 libcxx/test/benchmarks/locale/ctype.bench.cpp

diff --git a/libcxx/include/__config b/libcxx/include/__config
index af8a297fdf3fd..e5f94d31d8535 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -639,10 +639,6 @@ typedef __char32_t char32_t;
 #    define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1
 #  endif
 
-#  if defined(__APPLE__) || defined(__FreeBSD__)
-#    define _LIBCPP_HAS_DEFAULTRUNELOCALE
-#  endif
-
 #  if defined(__APPLE__) || defined(__FreeBSD__)
 #    define _LIBCPP_WCTYPE_IS_MASK
 #  endif
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index bbc30b1cfe03f..8dbc28e839839 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -64,8 +64,6 @@
 // Character manipulation functions
 // --------------------------------
 // namespace __locale {
-//  int     __islower(int, __locale_t);
-//  int     __isupper(int, __locale_t);
 //  int     __isdigit(int, __locale_t);  // required by the headers
 //  int     __isxdigit(int, __locale_t); // required by the headers
 //  int     __toupper(int, __locale_t);
@@ -208,11 +206,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
 //
 // Character manipulation functions
 //
-#    if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); }
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); }
-#    endif
-
 inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); }
 inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); }
 
diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h
index 54eb397358d7a..ac402924709e5 100644
--- a/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/libcxx/include/__locale_dir/support/bsd_like.h
@@ -89,12 +89,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
 //
 // Character manipulation functions
 //
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return ::islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return ::isupper_l(__c, __loc); }
-#endif
-
 inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); }
 
 inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); }
diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h
index fa0b03c646a2a..23bcf44c31dbf 100644
--- a/libcxx/include/__locale_dir/support/linux.h
+++ b/libcxx/include/__locale_dir/support/linux.h
@@ -116,12 +116,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
 //
 // Character manipulation functions
 //
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return isupper_l(__c, __loc); }
-#endif
-
 inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return isdigit_l(__c, __loc); }
 
 inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return isxdigit_l(__c, __loc); }
diff --git a/libcxx/include/__locale_dir/support/no_locale/characters.h b/libcxx/include/__locale_dir/support/no_locale/characters.h
index 4fb48ed9ceac1..1281b8bd13094 100644
--- a/libcxx/include/__locale_dir/support/no_locale/characters.h
+++ b/libcxx/include/__locale_dir/support/no_locale/characters.h
@@ -29,12 +29,6 @@ namespace __locale {
 //
 // Character manipulation functions
 //
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t) { return std::islower(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t) { return std::isupper(__c); }
-#endif
-
 inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); }
 
 inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); }
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0d3089c150081..0df8709f118d0 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -197,12 +197,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
 //
 // Character manipulation functions
 //
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return _islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return _isupper_l(__c, __loc); }
-#endif
-
 inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); }
 
 inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); }
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 30a7a54e1c016..a36297bc85843 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -726,48 +726,35 @@ const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const c
   return low;
 }
 
-wchar_t ctype<wchar_t>::do_toupper(char_type c) const {
-#  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
-#  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-  return std::__libcpp_isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
-#  else
-  return (std::__libcpp_isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
-#  endif
+template <class CharT>
+static CharT to_upper_impl(CharT c) {
+  if (!std::__libcpp_isascii(c) || c < 'a' || c > 'z')
+    return c;
+  return c & ~0x20;
 }
 
+template <class CharT>
+static CharT to_lower_impl(CharT c) {
+  if (!std::__libcpp_isascii(c) || c < 'A' || c > 'Z')
+    return c;
+  return c | 0x20;
+}
+
+wchar_t ctype<wchar_t>::do_toupper(char_type c) const { return to_upper_impl(c); }
+
 const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-#  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
-#  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-    *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
-#  else
-    *low =
-        (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
-#  endif
+    *low = to_upper_impl(*low);
   return low;
 }
 
 wchar_t ctype<wchar_t>::do_tolower(char_type c) const {
-#  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
-#  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-  return std::__libcpp_isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
-#  else
-  return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
-#  endif
+  return to_lower_impl(c);
 }
 
 const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-#  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
-#  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-    *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
-#  else
-    *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
-#  endif
+    *low = to_lower_impl(*low);
   return low;
 }
 
@@ -811,59 +798,19 @@ ctype<char>::~ctype() {
     delete[] __tab_;
 }
 
-char ctype<char>::do_toupper(char_type c) const {
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
-#elif defined(__NetBSD__)
-  return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-  return std::__libcpp_isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
-#else
-  return (std::__libcpp_isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
-#endif
-}
+char ctype<char>::do_toupper(char_type c) const { return to_upper_impl(c); }
 
 const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = std::__libcpp_isascii(*low)
-             ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)])
-             : *low;
-#elif defined(__NetBSD__)
-    *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-    *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
-#else
-    *low = (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
-#endif
+    *low = to_upper_impl(*low);
   return low;
 }
 
-char ctype<char>::do_tolower(char_type c) const {
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
-#elif defined(__NetBSD__)
-  return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-  return std::__libcpp_isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
-#else
-  return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
-#endif
-}
+char ctype<char>::do_tolower(char_type c) const { return to_lower_impl(c); }
 
 const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = std::__libcpp_isascii(*low)
-             ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)])
-             : *low;
-#elif defined(__NetBSD__)
-    *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-    *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
-#else
-    *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
-#endif
+    *low = to_lower_impl(*low);
   return low;
 }
 
diff --git a/libcxx/test/benchmarks/locale/ctype.bench.cpp b/libcxx/test/benchmarks/locale/ctype.bench.cpp
new file mode 100644
index 0000000000000..bc81b778b680c
--- /dev/null
+++ b/libcxx/test/benchmarks/locale/ctype.bench.cpp
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <locale>
+
+#include <benchmark/benchmark.h>
+
+#include "make_string.h"
+
+template <class CharT>
+static void BM_tolower_char(benchmark::State& state) {
+  const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(ct.tolower(CharT('c')));
+  }
+}
+
+BENCHMARK(BM_tolower_char<char>);
+BENCHMARK(BM_tolower_char<wchar_t>);
+
+template <class CharT>
+static void BM_tolower_string(benchmark::State& state) {
+  const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+  std::basic_string<CharT> str;
+
+  for (auto _ : state) {
+    str = MAKE_STRING_VIEW(CharT, "THIS IS A LONG STRING TO MAKE TO LOWER");
+    benchmark::DoNotOptimize(ct.tolower(str.data(), str.data() + str.size()));
+  }
+}
+
+BENCHMARK(BM_tolower_string<char>);
+BENCHMARK(BM_tolower_string<wchar_t>);
+
+template <class CharT>
+static void BM_toupper_char(benchmark::State& state) {
+  const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(ct.toupper(CharT('c')));
+  }
+}
+
+BENCHMARK(BM_toupper_char<char>);
+BENCHMARK(BM_toupper_char<wchar_t>);
+
+template <class CharT>
+static void BM_toupper_string(benchmark::State& state) {
+  const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+  std::basic_string<CharT> str;
+
+  for (auto _ : state) {
+    str = MAKE_STRING_VIEW(CharT, "THIS IS A LONG STRING TO MAKE TO LOWER");
+    benchmark::DoNotOptimize(ct.toupper(str.data(), str.data() + str.size()));
+  }
+}
+
+BENCHMARK(BM_toupper_string<char>);
+BENCHMARK(BM_toupper_string<wchar_t>);
+
+BENCHMARK_MAIN();



More information about the libcxx-commits mailing list