[libcxx-commits] [libcxx] [libc++][windows] Use _wsetlocale() in __locale_guard (PR #160479)
via libcxx-commits
libcxx-commits at lists.llvm.org
Fri Sep 26 02:46:27 PDT 2025
https://github.com/lb90 updated https://github.com/llvm/llvm-project/pull/160479
>From 231ab41289011017350cfc56f886ec3a1d776989 Mon Sep 17 00:00:00 2001
From: Luca Bacci <luca.bacci982 at gmail.com>
Date: Wed, 24 Sep 2025 11:14:25 +0200
Subject: [PATCH] [libc++][windows] Use _wsetlocale() in __locale_guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Querying the current locale string on Windows should always be done
with _wsetlocale(). The OS and the CRT support localized language
and country names, for example "Norwegian Bokmål_Norway".
Narrow setlocale() internally calls _wsetlocale() and converts the
returned wide string using the current LC_CTYPE charset. However
the string may not be representable in the current LC_CTYPE charset.
Additionally, if the LC_CTYPE charset is changed after the query,
the returned string becomes invalidly-encoded and cannot be used
to restore the locale.
This is a problem for code that temporarily changes the thread locale
using RAII methods.
Fixes #160478
---
.../__locale_dir/locale_base_api/locale_guard.h | 16 +++++++++++++---
libcxx/include/__locale_dir/support/windows.h | 16 +++++++++++++---
2 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
index e3583634e4322..7065874ea6d32 100644
--- a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
+++ b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
@@ -45,7 +45,11 @@ struct __libcpp_locale_guard {
// each category. In the second case, we know at least one category won't
// be what we want, so we only have to check the first case.
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
- __locale_all = _strdup(__lc);
+ // Use wsetlocale to query the current locale string. This avoids a lossy
+ // conversion of the locale string from UTF-16 to the current LC_CTYPE
+ // charset. The Windows CRT allows language / country strings outside of
+ // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+ __locale_all = _wcsdup(__wsetlocale(nullptr));
if (__locale_all == nullptr)
__throw_bad_alloc();
__setlocale(__l.__get_locale());
@@ -57,7 +61,7 @@ struct __libcpp_locale_guard {
// for the different categories in the same format as returned by
// setlocale(LC_ALL, nullptr).
if (__locale_all != nullptr) {
- __setlocale(__locale_all);
+ __wsetlocale(__locale_all);
free(__locale_all);
}
_configthreadlocale(__status);
@@ -68,8 +72,14 @@ struct __libcpp_locale_guard {
__throw_bad_alloc();
return __new_locale;
}
+ static const wchar_t* __wsetlocale(const wchar_t* __locale) {
+ const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale);
+ if (__new_locale == nullptr)
+ __throw_bad_alloc();
+ return __new_locale;
+ }
int __status;
- char* __locale_all = nullptr;
+ wchar_t* __locale_all = nullptr;
};
#endif
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0df8709f118d0..39391ea1fd9c4 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc
std::__throw_bad_alloc();
return __new_locale;
}
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) {
+ wchar_t* __new_locale = ::_wsetlocale(__category, __locale);
+ if (__new_locale == nullptr)
+ std::__throw_bad_alloc();
+ return __new_locale;
+}
_LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc);
#endif // _LIBCPP_BUILDING_LIBRARY
@@ -309,7 +315,11 @@ struct __locale_guard {
// each category. In the second case, we know at least one category won't
// be what we want, so we only have to check the first case.
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
- __locale_all = _strdup(__lc);
+ // Use wsetlocale to query the current locale string. This avoids a lossy
+ // conversion of the locale string from UTF-16 to the current LC_CTYPE
+ // charset. The Windows CRT allows language / country strings outside of
+ // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+ __locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr));
if (__locale_all == nullptr)
std::__throw_bad_alloc();
__locale::__setlocale(LC_ALL, __l.__get_locale());
@@ -321,13 +331,13 @@ struct __locale_guard {
// for the different categories in the same format as returned by
// setlocale(LC_ALL, nullptr).
if (__locale_all != nullptr) {
- __locale::__setlocale(LC_ALL, __locale_all);
+ __locale::__wsetlocale(LC_ALL, __locale_all);
free(__locale_all);
}
_configthreadlocale(__status);
}
int __status;
- char* __locale_all = nullptr;
+ wchar_t* __locale_all = nullptr;
};
#endif // _LIBCPP_BUILDING_LIBRARY
More information about the libcxx-commits
mailing list