[libcxx-commits] [libcxx] [libc++] Optimize ctype::to{lower, upper} (PR #145344)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jun 23 08:41:09 PDT 2025
https://github.com/philnik777 created https://github.com/llvm/llvm-project/pull/145344
```
----------------------------------------------
Benchmark old new
--------------------------- ------------------
BM_tolower_char<char> 1.64 ns 1.41 ns
BM_tolower_char<wchar_t> 1.64 ns 1.41 ns
BM_tolower_string<char> 32.4 ns 12.8 ns
BM_tolower_string<wchar_t> 32.9 ns 15.1 ns
BM_toupper_char<char> 1.63 ns 1.64 ns
BM_toupper_char<wchar_t> 1.63 ns 1.41 ns
BM_toupper_string<char> 32.2 ns 12.7 ns
BM_toupper_string<wchar_t> 33.0 ns 15.1 ns
```
>From cf0213a4f54f5bb0362925c7b154ed82b50e3dc2 Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser at berlin.de>
Date: Mon, 23 Jun 2025 17:40:34 +0200
Subject: [PATCH] [libc++] Optimize ctype::to{lower,upper}
---
libcxx/include/__config | 4 -
libcxx/include/__locale_dir/locale_base_api.h | 7 --
.../include/__locale_dir/support/bsd_like.h | 6 --
libcxx/include/__locale_dir/support/linux.h | 6 --
.../support/no_locale/characters.h | 6 --
libcxx/include/__locale_dir/support/windows.h | 6 --
libcxx/src/locale.cpp | 95 ++++---------------
libcxx/test/benchmarks/locale/ctype.bench.cpp | 69 ++++++++++++++
8 files changed, 90 insertions(+), 109 deletions(-)
create mode 100644 libcxx/test/benchmarks/locale/ctype.bench.cpp
diff --git a/libcxx/include/__config b/libcxx/include/__config
index af8a297fdf3fd..e5f94d31d8535 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -639,10 +639,6 @@ typedef __char32_t char32_t;
# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1
# endif
-# if defined(__APPLE__) || defined(__FreeBSD__)
-# define _LIBCPP_HAS_DEFAULTRUNELOCALE
-# endif
-
# if defined(__APPLE__) || defined(__FreeBSD__)
# define _LIBCPP_WCTYPE_IS_MASK
# endif
diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h
index bbc30b1cfe03f..8dbc28e839839 100644
--- a/libcxx/include/__locale_dir/locale_base_api.h
+++ b/libcxx/include/__locale_dir/locale_base_api.h
@@ -64,8 +64,6 @@
// Character manipulation functions
// --------------------------------
// namespace __locale {
-// int __islower(int, __locale_t);
-// int __isupper(int, __locale_t);
// int __isdigit(int, __locale_t); // required by the headers
// int __isxdigit(int, __locale_t); // required by the headers
// int __toupper(int, __locale_t);
@@ -208,11 +206,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
//
// Character manipulation functions
//
-# if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); }
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); }
-# endif
-
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); }
diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h
index 54eb397358d7a..ac402924709e5 100644
--- a/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/libcxx/include/__locale_dir/support/bsd_like.h
@@ -89,12 +89,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
//
// Character manipulation functions
//
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return ::islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return ::isupper_l(__c, __loc); }
-#endif
-
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); }
diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h
index fa0b03c646a2a..23bcf44c31dbf 100644
--- a/libcxx/include/__locale_dir/support/linux.h
+++ b/libcxx/include/__locale_dir/support/linux.h
@@ -116,12 +116,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
//
// Character manipulation functions
//
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return isupper_l(__c, __loc); }
-#endif
-
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return isdigit_l(__c, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return isxdigit_l(__c, __loc); }
diff --git a/libcxx/include/__locale_dir/support/no_locale/characters.h b/libcxx/include/__locale_dir/support/no_locale/characters.h
index 4fb48ed9ceac1..1281b8bd13094 100644
--- a/libcxx/include/__locale_dir/support/no_locale/characters.h
+++ b/libcxx/include/__locale_dir/support/no_locale/characters.h
@@ -29,12 +29,6 @@ namespace __locale {
//
// Character manipulation functions
//
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t) { return std::islower(__c); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t) { return std::isupper(__c); }
-#endif
-
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); }
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0d3089c150081..0df8709f118d0 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -197,12 +197,6 @@ __strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) {
//
// Character manipulation functions
//
-#if defined(_LIBCPP_BUILDING_LIBRARY)
-inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return _islower_l(__c, __loc); }
-
-inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return _isupper_l(__c, __loc); }
-#endif
-
inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); }
inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); }
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 30a7a54e1c016..a36297bc85843 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -726,48 +726,35 @@ const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const c
return low;
}
-wchar_t ctype<wchar_t>::do_toupper(char_type c) const {
-# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
-# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
- return std::__libcpp_isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
-# else
- return (std::__libcpp_isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
-# endif
+template <class CharT>
+static CharT to_upper_impl(CharT c) {
+ if (!std::__libcpp_isascii(c) || c < 'a' || c > 'z')
+ return c;
+ return c & ~0x20;
}
+template <class CharT>
+static CharT to_lower_impl(CharT c) {
+ if (!std::__libcpp_isascii(c) || c < 'A' || c > 'Z')
+ return c;
+ return c | 0x20;
+}
+
+wchar_t ctype<wchar_t>::do_toupper(char_type c) const { return to_upper_impl(c); }
+
const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const {
for (; low != high; ++low)
-# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
-# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
- *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
-# else
- *low =
- (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
-# endif
+ *low = to_upper_impl(*low);
return low;
}
wchar_t ctype<wchar_t>::do_tolower(char_type c) const {
-# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- return std::__libcpp_isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
-# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
- return std::__libcpp_isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
-# else
- return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
-# endif
+ return to_lower_impl(c);
}
const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const {
for (; low != high; ++low)
-# ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- *low = std::__libcpp_isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
-# elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
- *low = std::__libcpp_isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
-# else
- *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
-# endif
+ *low = to_lower_impl(*low);
return low;
}
@@ -811,59 +798,19 @@ ctype<char>::~ctype() {
delete[] __tab_;
}
-char ctype<char>::do_toupper(char_type c) const {
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
-#elif defined(__NetBSD__)
- return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
- return std::__libcpp_isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
-#else
- return (std::__libcpp_isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
-#endif
-}
+char ctype<char>::do_toupper(char_type c) const { return to_upper_impl(c); }
const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const {
for (; low != high; ++low)
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- *low = std::__libcpp_isascii(*low)
- ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)])
- : *low;
-#elif defined(__NetBSD__)
- *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
- *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
-#else
- *low = (std::__libcpp_isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
-#endif
+ *low = to_upper_impl(*low);
return low;
}
-char ctype<char>::do_tolower(char_type c) const {
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- return std::__libcpp_isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
-#elif defined(__NetBSD__)
- return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
- return std::__libcpp_isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
-#else
- return (std::__libcpp_isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
-#endif
-}
+char ctype<char>::do_tolower(char_type c) const { return to_lower_impl(c); }
const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const {
for (; low != high; ++low)
-#ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
- *low = std::__libcpp_isascii(*low)
- ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)])
- : *low;
-#elif defined(__NetBSD__)
- *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
- *low = std::__libcpp_isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
-#else
- *low = (std::__libcpp_isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
-#endif
+ *low = to_lower_impl(*low);
return low;
}
diff --git a/libcxx/test/benchmarks/locale/ctype.bench.cpp b/libcxx/test/benchmarks/locale/ctype.bench.cpp
new file mode 100644
index 0000000000000..bc81b778b680c
--- /dev/null
+++ b/libcxx/test/benchmarks/locale/ctype.bench.cpp
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+#include <locale>
+
+#include <benchmark/benchmark.h>
+
+#include "make_string.h"
+
+template <class CharT>
+static void BM_tolower_char(benchmark::State& state) {
+ const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(ct.tolower(CharT('c')));
+ }
+}
+
+BENCHMARK(BM_tolower_char<char>);
+BENCHMARK(BM_tolower_char<wchar_t>);
+
+template <class CharT>
+static void BM_tolower_string(benchmark::State& state) {
+ const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+ std::basic_string<CharT> str;
+
+ for (auto _ : state) {
+ str = MAKE_STRING_VIEW(CharT, "THIS IS A LONG STRING TO MAKE TO LOWER");
+ benchmark::DoNotOptimize(ct.tolower(str.data(), str.data() + str.size()));
+ }
+}
+
+BENCHMARK(BM_tolower_string<char>);
+BENCHMARK(BM_tolower_string<wchar_t>);
+
+template <class CharT>
+static void BM_toupper_char(benchmark::State& state) {
+ const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+
+ for (auto _ : state) {
+ benchmark::DoNotOptimize(ct.toupper(CharT('c')));
+ }
+}
+
+BENCHMARK(BM_toupper_char<char>);
+BENCHMARK(BM_toupper_char<wchar_t>);
+
+template <class CharT>
+static void BM_toupper_string(benchmark::State& state) {
+ const auto& ct = std::use_facet<std::ctype<CharT>>(std::locale::classic());
+ std::basic_string<CharT> str;
+
+ for (auto _ : state) {
+ str = MAKE_STRING_VIEW(CharT, "THIS IS A LONG STRING TO MAKE TO LOWER");
+ benchmark::DoNotOptimize(ct.toupper(str.data(), str.data() + str.size()));
+ }
+}
+
+BENCHMARK(BM_toupper_string<char>);
+BENCHMARK(BM_toupper_string<wchar_t>);
+
+BENCHMARK_MAIN();
More information about the libcxx-commits
mailing list