[libcxx-commits] [libcxx] [libc++][windows] Use _wsetlocale() in __locale_guard (PR #160479)
via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Oct 11 07:23:54 PDT 2025
https://github.com/lb90 updated https://github.com/llvm/llvm-project/pull/160479
>From 803b2311824c6dbbe377a3b476cf759d4774e846 Mon Sep 17 00:00:00 2001
From: Luca Bacci <luca.bacci982 at gmail.com>
Date: Wed, 24 Sep 2025 11:14:25 +0200
Subject: [PATCH] [libc++][windows] Use _wsetlocale() in __locale_guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Querying the current locale string on Windows should always be done
with _wsetlocale(). The OS and the CRT support localized language
and country names, for example "Norwegian Bokmål_Norway".
Narrow setlocale() internally calls _wsetlocale() and converts the
returned wide string using the current LC_CTYPE charset. However
the string may not be representable in the current LC_CTYPE charset.
Additionally, if the LC_CTYPE charset is changed after the query,
the returned string becomes invalidly-encoded and cannot be used
to restore the locale.
This is a problem for code that temporarily changes the thread locale
using RAII methods.
Fixes #160478
---
.../locale_base_api/locale_guard.h | 16 ++++++--
libcxx/include/__locale_dir/support/windows.h | 16 ++++++--
.../windows.non_ascii_locale_names.pass.cpp | 39 +++++++++++++++++++
3 files changed, 65 insertions(+), 6 deletions(-)
create mode 100644 libcxx/test/libcxx/localization/locales/windows.non_ascii_locale_names.pass.cpp
diff --git a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
index e3583634e4322..7065874ea6d32 100644
--- a/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
+++ b/libcxx/include/__cxx03/__locale_dir/locale_base_api/locale_guard.h
@@ -45,7 +45,11 @@ struct __libcpp_locale_guard {
// each category. In the second case, we know at least one category won't
// be what we want, so we only have to check the first case.
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
- __locale_all = _strdup(__lc);
+ // Use wsetlocale to query the current locale string. This avoids a lossy
+ // conversion of the locale string from UTF-16 to the current LC_CTYPE
+ // charset. The Windows CRT allows language / country strings outside of
+ // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+ __locale_all = _wcsdup(__wsetlocale(nullptr));
if (__locale_all == nullptr)
__throw_bad_alloc();
__setlocale(__l.__get_locale());
@@ -57,7 +61,7 @@ struct __libcpp_locale_guard {
// for the different categories in the same format as returned by
// setlocale(LC_ALL, nullptr).
if (__locale_all != nullptr) {
- __setlocale(__locale_all);
+ __wsetlocale(__locale_all);
free(__locale_all);
}
_configthreadlocale(__status);
@@ -68,8 +72,14 @@ struct __libcpp_locale_guard {
__throw_bad_alloc();
return __new_locale;
}
+ static const wchar_t* __wsetlocale(const wchar_t* __locale) {
+ const wchar_t* __new_locale = _wsetlocale(LC_ALL, __locale);
+ if (__new_locale == nullptr)
+ __throw_bad_alloc();
+ return __new_locale;
+ }
int __status;
- char* __locale_all = nullptr;
+ wchar_t* __locale_all = nullptr;
};
#endif
diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h
index 0df8709f118d0..39391ea1fd9c4 100644
--- a/libcxx/include/__locale_dir/support/windows.h
+++ b/libcxx/include/__locale_dir/support/windows.h
@@ -162,6 +162,12 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc
std::__throw_bad_alloc();
return __new_locale;
}
+inline _LIBCPP_HIDE_FROM_ABI wchar_t* __wsetlocale(int __category, const wchar_t* __locale) {
+ wchar_t* __new_locale = ::_wsetlocale(__category, __locale);
+ if (__new_locale == nullptr)
+ std::__throw_bad_alloc();
+ return __new_locale;
+}
_LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc);
#endif // _LIBCPP_BUILDING_LIBRARY
@@ -309,7 +315,11 @@ struct __locale_guard {
// each category. In the second case, we know at least one category won't
// be what we want, so we only have to check the first case.
if (std::strcmp(__l.__get_locale(), __lc) != 0) {
- __locale_all = _strdup(__lc);
+ // Use wsetlocale to query the current locale string. This avoids a lossy
+ // conversion of the locale string from UTF-16 to the current LC_CTYPE
+ // charset. The Windows CRT allows language / country strings outside of
+ // ASCII, e.g. "Norwegian Bokm\u00E5l_Norway.utf8".
+ __locale_all = _wcsdup(__locale::__wsetlocale(LC_ALL, nullptr));
if (__locale_all == nullptr)
std::__throw_bad_alloc();
__locale::__setlocale(LC_ALL, __l.__get_locale());
@@ -321,13 +331,13 @@ struct __locale_guard {
// for the different categories in the same format as returned by
// setlocale(LC_ALL, nullptr).
if (__locale_all != nullptr) {
- __locale::__setlocale(LC_ALL, __locale_all);
+ __locale::__wsetlocale(LC_ALL, __locale_all);
free(__locale_all);
}
_configthreadlocale(__status);
}
int __status;
- char* __locale_all = nullptr;
+ wchar_t* __locale_all = nullptr;
};
#endif // _LIBCPP_BUILDING_LIBRARY
diff --git a/libcxx/test/libcxx/localization/locales/windows.non_ascii_locale_names.pass.cpp b/libcxx/test/libcxx/localization/locales/windows.non_ascii_locale_names.pass.cpp
new file mode 100644
index 0000000000000..dbce82bb9ea99
--- /dev/null
+++ b/libcxx/test/libcxx/localization/locales/windows.non_ascii_locale_names.pass.cpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <locale>
+
+// REQUIRES: windows
+
+// The C RunTime library on Windows supports locale strings with
+// characters outside the ASCII range. This poses challenges for
+// code that temporarily set a custom thread locale.
+//
+// https://github.com/llvm/llvm-project/issues/160478
+
+#include <iostream>
+#include <iomanip>
+#include <locale>
+#include <clocale>
+#include <cstdlib>
+#include <cassert>
+
+#include "test_macros.h"
+
+int main(int, char**) {
+ // Check that the C locale doesn't use the CP437 charset
+ LIBCPP_ASSERT(std::setlocale(LC_ALL, "Norwegian Bokm\x86l_Norway") == nullptr);
+
+ LIBCPP_ASSERT(std::setlocale(LC_ALL, ".437"));
+ LIBCPP_ASSERT(std::setlocale(LC_ALL, "Norwegian Bokm\x86l_Norway.437"));
+
+ std::cerr.imbue(std::locale::classic());
+ std::cerr << std::setprecision(2) << 0.1 << std::endl;
+
+ return EXIT_SUCCESS;
+}
More information about the libcxx-commits
mailing list