[libcxx-commits] [libcxx] [libc++][windows] Use _wsetlocale() in __locale_guard (PR #160479)

via libcxx-commits libcxx-commits at lists.llvm.org
Fri Sep 26 02:46:27 PDT 2025


https://github.com/lb90 updated https://github.com/llvm/llvm-project/pull/160479

>From 231ab41289011017350cfc56f886ec3a1d776989 Mon Sep 17 00:00:00 2001
From: Luca Bacci <luca.bacci982 at gmail.com>
Date: Wed, 24 Sep 2025 11:14:25 +0200
Subject: [PATCH] [libc++][windows] Use _wsetlocale() in __locale_guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Querying the current locale string on Windows should always be done
with _wsetlocale(). The OS and the CRT support localized language
and country names, for example "Norwegian Bokmål_Norway".

Narrow setlocale() internally calls _wsetlocale() and converts the
returned wide string using the current LC_CTYPE charset. However
the string may not be representable in the current LC_CTYPE charset.
Additionally, if the LC_CTYPE charset is changed after the query,
the returned string becomes invalidly-encoded and cannot be used
to restore the locale.

This is a problem for code that temporarily changes the thread locale
using RAII methods.

Fixes #160478
---
 .../__locale_dir/locale_base_api/locale_guard.h  | 16 +++++++++++++---
 libcxx/include/__locale_dir/support/windows.h    | 16 +++++++++++++---
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
index e3583634e4322..7065874ea6d32 100644
--- a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
+++ b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
@@ -45,7 +45,11 @@ struct __libcpp_locale_guard {
     // each category.  In the second case, we know at least one category won't
     // be what we want, so we only have to check the first case.
     if (std::strcmp(__l.__get_locale(), __lc) != 0) {
-      __locale_all = _strdup(__lc);
+      // Use wsetlocale to query the current locale string. This avoids a lossy
+      // conversion of the locale string from UTF-16 to the current LC_CTYPE
+      // charset. The Windows CRT allows language / country strings outside of
+      // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+      __locale_all = _wcsdup(__wsetlocale(nullptr));
       if (__locale_all == nullptr)
         __throw_bad_alloc();
       __setlocale(__l.__get_locale());
@@ -57,7 +61,7 @@ struct __libcpp_locale_guard {
     // for the different categories in the same format as returned by
     // setlocale(LC_ALL, nullptr).
     if (__locale_all != nullptr) {
-      __setlocale(__locale_all);
+      __wsetlocale(__locale_all);
       free(__locale_all);
     }
     _configthreadlocale(__status);
@@ -68,8 +72,14 @@ struct __libcpp_locale_guard {
       __throw_bad_alloc();
     return __new_locale;
   }
+  static const wchar_t* __wsetlocale(const wchar_t* __locale) {
+    const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale);
+    if (__new_locale == nullptr)
+      __throw_bad_alloc();
+    return __new_locale;
+  }
   int __status;
-  char* __locale_all = nullptr;
+  wchar_t* __locale_all = nullptr;
 };
 #endif
 
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0df8709f118d0..39391ea1fd9c4 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc
     std::__throw_bad_alloc();
   return __new_locale;
 }
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) {
+  wchar_t* __new_locale = ::_wsetlocale(__category, __locale);
+  if (__new_locale == nullptr)
+    std::__throw_bad_alloc();
+  return __new_locale;
+}
 _LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc);
 #endif // _LIBCPP_BUILDING_LIBRARY
 
@@ -309,7 +315,11 @@ struct __locale_guard {
     // each category.  In the second case, we know at least one category won't
     // be what we want, so we only have to check the first case.
     if (std::strcmp(__l.__get_locale(), __lc) != 0) {
-      __locale_all = _strdup(__lc);
+      // Use wsetlocale to query the current locale string. This avoids a lossy
+      // conversion of the locale string from UTF-16 to the current LC_CTYPE
+      // charset. The Windows CRT allows language / country strings outside of
+      // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+      __locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr));
       if (__locale_all == nullptr)
         std::__throw_bad_alloc();
       __locale::__setlocale(LC_ALL, __l.__get_locale());
@@ -321,13 +331,13 @@ struct __locale_guard {
     // for the different categories in the same format as returned by
     // setlocale(LC_ALL, nullptr).
     if (__locale_all != nullptr) {
-      __locale::__setlocale(LC_ALL, __locale_all);
+      __locale::__wsetlocale(LC_ALL, __locale_all);
       free(__locale_all);
     }
     _configthreadlocale(__status);
   }
   int __status;
-  char* __locale_all = nullptr;
+  wchar_t* __locale_all = nullptr;
 };
 #endif // _LIBCPP_BUILDING_LIBRARY
 



More information about the libcxx-commits mailing list