[libcxx-commits] [libcxx] Implement P1885R12: `<text_encoding>` header (PR #141312)
William Tran-Viet via libcxx-commits
libcxx-commits at lists.llvm.org
Wed May 28 21:00:09 PDT 2025
https://github.com/smallp-o-p updated https://github.com/llvm/llvm-project/pull/141312
>From 4ecef4f3d68fac70395e621a6a4e578f97769dcf Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Fri, 23 May 2025 22:08:36 -0400
Subject: [PATCH 01/54] Implement P1885R12
---
libcxx/docs/FeatureTestMacroTable.rst | 2 +-
libcxx/docs/Status/Cxx2cPapers.csv | 4 +-
libcxx/include/CMakeLists.txt | 2 +
libcxx/include/__locale | 9 +
.../include/__text_encoding/text_encoding.h | 1483 +++++++++++++++++
libcxx/include/module.modulemap.in | 7 +
libcxx/include/text_encoding | 68 +
libcxx/include/version | 2 +-
libcxx/modules/std.compat.cppm.in | 3 -
libcxx/modules/std.cppm.in | 6 +-
libcxx/modules/std/text_encoding.inc | 9 +-
libcxx/src/CMakeLists.txt | 1 +
libcxx/src/locale.cpp | 13 +
libcxx/src/text_encoding.cpp | 49 +
.../test/libcxx/transitive_includes/cxx26.csv | 15 +
.../text_encoding.version.compile.pass.cpp | 63 +
.../version.version.compile.pass.cpp | 16 +-
.../locale/locale.members/encoding.pass.cpp | 56 +
.../text_encoding.ctor/default.pass.cpp | 39 +
.../text_encoding.ctor/id.pass.cpp | 56 +
.../text_encoding.ctor/string_view.pass.cpp | 73 +
.../text_encoding.eq/equal.id.pass.cpp | 69 +
.../text_encoding.eq/equal.pass.cpp | 66 +
.../text_encoding.members/aliases.pass.cpp | 37 +
.../environment.pass.cpp | 83 +
.../text_encoding.members/literal.pass.cpp | 49 +
.../text_encoding.aliases_view/begin.pass.cpp | 66 +
.../text_encoding.aliases_view/empty.pass.cpp | 64 +
.../text_encoding.aliases_view/front.pass.cpp | 66 +
libcxx/test/support/test_text_encoding.h | 1173 +++++++++++++
.../generate_feature_test_macro_components.py | 1 -
libcxx/utils/libcxx/header_information.py | 3 +-
32 files changed, 3624 insertions(+), 29 deletions(-)
create mode 100644 libcxx/include/__text_encoding/text_encoding.h
create mode 100644 libcxx/include/text_encoding
create mode 100644 libcxx/src/text_encoding.cpp
create mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
create mode 100644 libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
create mode 100644 libcxx/test/support/test_text_encoding.h
diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst
index 9b57b7c8eeb52..93308e4078075 100644
--- a/libcxx/docs/FeatureTestMacroTable.rst
+++ b/libcxx/docs/FeatureTestMacroTable.rst
@@ -500,7 +500,7 @@ Status
---------------------------------------------------------- -----------------
``__cpp_lib_submdspan`` *unimplemented*
---------------------------------------------------------- -----------------
- ``__cpp_lib_text_encoding`` *unimplemented*
+ ``__cpp_lib_text_encoding`` ``202306L``
---------------------------------------------------------- -----------------
``__cpp_lib_to_chars`` *unimplemented*
---------------------------------------------------------- -----------------
diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv
index 3809446a57896..a7dfa75df7c87 100644
--- a/libcxx/docs/Status/Cxx2cPapers.csv
+++ b/libcxx/docs/Status/Cxx2cPapers.csv
@@ -13,7 +13,7 @@
"`P2013R5 <https://wg21.link/P2013R5>`__","Freestanding Language: Optional ``::operator new``","2023-06 (Varna)","","",""
"`P2363R5 <https://wg21.link/P2363R5>`__","Extending associative containers with the remaining heterogeneous overloads","2023-06 (Varna)","","",""
"`P1901R2 <https://wg21.link/P1901R2>`__","Enabling the Use of ``weak_ptr`` as Keys in Unordered Associative Containers","2023-06 (Varna)","","",""
-"`P1885R12 <https://wg21.link/P1885R12>`__","Naming Text Encodings to Demystify Them","2023-06 (Varna)","","",""
+"`P1885R12 <https://wg21.link/P1885R12>`__","Naming Text Encodings to Demystify Them","2023-06 (Varna)","|Complete|","21",""
"`P0792R14 <https://wg21.link/P0792R14>`__","``function_ref``: a type-erased callable reference","2023-06 (Varna)","","",""
"`P2874R2 <https://wg21.link/P2874R2>`__","P2874R2: Mandating Annex D Require No More","2023-06 (Varna)","|Complete|","12",""
"`P2757R3 <https://wg21.link/P2757R3>`__","Type-checking format args","2023-06 (Varna)","","",""
@@ -79,7 +79,7 @@
"`P3136R1 <https://wg21.link/P3136R1>`__","Retiring niebloids","2024-11 (Wrocław)","|Complete|","14",""
"`P3138R5 <https://wg21.link/P3138R5>`__","``views::cache_latest``","2024-11 (Wrocław)","","",""
"`P3379R0 <https://wg21.link/P3379R0>`__","Constrain ``std::expected`` equality operators","2024-11 (Wrocław)","|Complete|","21",""
-"`P2862R1 <https://wg21.link/P2862R1>`__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","","",""
+"`P2862R1 <https://wg21.link/P2862R1>`__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","|Complete|","21",""
"`P2897R7 <https://wg21.link/P2897R7>`__","``aligned_accessor``: An ``mdspan`` accessor expressing pointer over-alignment","2024-11 (Wrocław)","|Complete|","21",""
"`P3355R1 <https://wg21.link/P3355R1>`__","Fix ``submdspan`` for C++26","2024-11 (Wrocław)","","",""
"`P3222R0 <https://wg21.link/P3222R0>`__","Fix C++26 by adding transposed special cases for P2642 layouts","2024-11 (Wrocław)","","",""
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 43cefd5600646..ba61ee7c11e35 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -751,6 +751,7 @@ set(files
__system_error/error_condition.h
__system_error/system_error.h
__system_error/throw_system_error.h
+ __text_encoding/text_encoding.h
__thread/formatter.h
__thread/id.h
__thread/jthread.h
@@ -1062,6 +1063,7 @@ set(files
strstream
syncstream
system_error
+ text_encoding
tgmath.h
thread
tuple
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index d6c6ef19627ff..4da3f38ac408f 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -31,6 +31,10 @@
# include <cstddef>
# include <cstring>
+# if _LIBCPP_STD_VER >= 26
+# include <__text_encoding/text_encoding.h>
+# endif
+
# if _LIBCPP_HAS_WIDE_CHARACTERS
# include <cwchar>
# else
@@ -99,6 +103,11 @@ public:
// locale operations:
string name() const;
+
+# if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
+ text_encoding encoding() const;
+# endif // _LIBCPP_STD_VER >= 26
+
bool operator==(const locale&) const;
# if _LIBCPP_STD_VER <= 17
_LIBCPP_HIDE_FROM_ABI bool operator!=(const locale& __y) const { return !(*this == __y); }
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
new file mode 100644
index 0000000000000..93d0ae2ab6b89
--- /dev/null
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -0,0 +1,1483 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
+#define _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if _LIBCPP_HAS_LOCALIZATION
+
+#include <__algorithm/copy_n.h>
+#include <__algorithm/lower_bound.h>
+#include <__algorithm/min.h>
+#include <__functional/hash.h>
+#include <__iterator/iterator_traits.h>
+#include <__locale_dir/locale_base_api.h>
+#include <__ranges/view_interface.h>
+#include <__string/char_traits.h>
+#include <__utility/unreachable.h>
+#include <cstdint>
+#include <string_view>
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+#if _LIBCPP_STD_VER >= 26
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
+ static constexpr size_t max_name_length = 63;
+
+private:
+ struct __encoding_data {
+ using __id_rep _LIBCPP_NODEBUG = int_least32_t;
+ __id_rep __mib_rep;
+ const char* __name;
+
+ friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) _NOEXCEPT {
+ return __e.__mib_rep == __other.__mib_rep || __comp_name(__e.__name, __other.__name);
+ }
+
+ friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) _NOEXCEPT {
+ return __e.__mib_rep < __i;
+ }
+ };
+
+public:
+ enum class id : __encoding_data::__id_rep {
+ other = 1,
+ unknown = 2,
+ ASCII = 3,
+ ISOLatin1 = 4,
+ ISOLatin2 = 5,
+ ISOLatin3 = 6,
+ ISOLatin4 = 7,
+ ISOLatinCyrillic = 8,
+ ISOLatinArabic = 9,
+ ISOLatinGreek = 10,
+ ISOLatinHebrew = 11,
+ ISOLatin5 = 12,
+ ISOLatin6 = 13,
+ ISOTextComm = 14,
+ HalfWidthKatakana = 15,
+ JISEncoding = 16,
+ ShiftJIS = 17,
+ EUCPkdFmtJapanese = 18,
+ EUCFixWidJapanese = 19,
+ ISO4UnitedKingdom = 20,
+ ISO11SwedishForNames = 21,
+ ISO15Italian = 22,
+ ISO17Spanish = 23,
+ ISO21German = 24,
+ ISO60DanishNorwegian = 25,
+ ISO69French = 26,
+ ISO10646UTF1 = 27,
+ ISO646basic1983 = 28,
+ INVARIANT = 29,
+ ISO2IntlRefVersion = 30,
+ NATSSEFI = 31,
+ NATSSEFIADD = 32,
+ NATSDANO = 33,
+ NATSDANOADD = 34,
+ ISO10Swedish = 35,
+ KSC56011987 = 36,
+ ISO2022KR = 37,
+ EUCKR = 38,
+ ISO2022JP = 39,
+ ISO2022JP2 = 40,
+ ISO13JISC6220jp = 41,
+ ISO14JISC6220ro = 42,
+ ISO16Portuguese = 43,
+ ISO18Greek7Old = 44,
+ ISO19LatinGreek = 45,
+ ISO25French = 46,
+ ISO27LatinGreek1 = 47,
+ ISO5427Cyrillic = 48,
+ ISO42JISC62261978 = 49,
+ ISO47BSViewdata = 50,
+ ISO49INIS = 51,
+ ISO50INIS8 = 52,
+ ISO51INISCyrillic = 53,
+ ISO54271981 = 54,
+ ISO5428Greek = 55,
+ ISO57GB1988 = 56,
+ ISO58GB231280 = 57,
+ ISO61Norwegian2 = 58,
+ ISO70VideotexSupp1 = 59,
+ ISO84Portuguese2 = 60,
+ ISO85Spanish2 = 61,
+ ISO86Hungarian = 62,
+ ISO87JISX0208 = 63,
+ ISO88Greek7 = 64,
+ ISO89ASMO449 = 65,
+ ISO90 = 66,
+ ISO91JISC62291984a = 67,
+ ISO92JISC62991984b = 68,
+ ISO93JIS62291984badd = 69,
+ ISO94JIS62291984hand = 70,
+ ISO95JIS62291984handadd = 71,
+ ISO96JISC62291984kana = 72,
+ ISO2033 = 73,
+ ISO99NAPLPS = 74,
+ ISO102T617bit = 75,
+ ISO103T618bit = 76,
+ ISO111ECMACyrillic = 77,
+ ISO121Canadian1 = 78,
+ ISO122Canadian2 = 79,
+ ISO123CSAZ24341985gr = 80,
+ ISO88596E = 81,
+ ISO88596I = 82,
+ ISO128T101G2 = 83,
+ ISO88598E = 84,
+ ISO88598I = 85,
+ ISO139CSN369103 = 86,
+ ISO141JUSIB1002 = 87,
+ ISO143IECP271 = 88,
+ ISO146Serbian = 89,
+ ISO147Macedonian = 90,
+ ISO150 = 91,
+ ISO151Cuba = 92,
+ ISO6937Add = 93,
+ ISO153GOST1976874 = 94,
+ ISO8859Supp = 95,
+ ISO10367Box = 96,
+ ISO158Lap = 97,
+ ISO159JISX02121990 = 98,
+ ISO646Danish = 99,
+ USDK = 100,
+ DKUS = 101,
+ KSC5636 = 102,
+ Unicode11UTF7 = 103,
+ ISO2022CN = 104,
+ ISO2022CNEXT = 105,
+ UTF8 = 106,
+ ISO885913 = 109,
+ ISO885914 = 110,
+ ISO885915 = 111,
+ ISO885916 = 112,
+ GBK = 113,
+ GB18030 = 114,
+ OSDEBCDICDF0415 = 115,
+ OSDEBCDICDF03IRV = 116,
+ OSDEBCDICDF041 = 117,
+ ISO115481 = 118,
+ KZ1048 = 119,
+ UCS2 = 1000,
+ UCS4 = 1001,
+ UnicodeASCII = 1002,
+ UnicodeLatin1 = 1003,
+ UnicodeJapanese = 1004,
+ UnicodeIBM1261 = 1005,
+ UnicodeIBM1268 = 1006,
+ UnicodeIBM1276 = 1007,
+ UnicodeIBM1264 = 1008,
+ UnicodeIBM1265 = 1009,
+ Unicode11 = 1010,
+ SCSU = 1011,
+ UTF7 = 1012,
+ UTF16BE = 1013,
+ UTF16LE = 1014,
+ UTF16 = 1015,
+ CESU8 = 1016,
+ UTF32 = 1017,
+ UTF32BE = 1018,
+ UTF32LE = 1019,
+ BOCU1 = 1020,
+ UTF7IMAP = 1021,
+ Windows30Latin1 = 2000,
+ Windows31Latin1 = 2001,
+ Windows31Latin2 = 2002,
+ Windows31Latin5 = 2003,
+ HPRoman8 = 2004,
+ AdobeStandardEncoding = 2005,
+ VenturaUS = 2006,
+ VenturaInternational = 2007,
+ DECMCS = 2008,
+ PC850Multilingual = 2009,
+ PC8DanishNorwegian = 2012,
+ PC862LatinHebrew = 2013,
+ PC8Turkish = 2014,
+ IBMSymbols = 2015,
+ IBMThai = 2016,
+ HPLegal = 2017,
+ HPPiFont = 2018,
+ HPMath8 = 2019,
+ HPPSMath = 2020,
+ HPDesktop = 2021,
+ VenturaMath = 2022,
+ MicrosoftPublishing = 2023,
+ Windows31J = 2024,
+ GB2312 = 2025,
+ Big5 = 2026,
+ Macintosh = 2027,
+ IBM037 = 2028,
+ IBM038 = 2029,
+ IBM273 = 2030,
+ IBM274 = 2031,
+ IBM275 = 2032,
+ IBM277 = 2033,
+ IBM278 = 2034,
+ IBM280 = 2035,
+ IBM281 = 2036,
+ IBM284 = 2037,
+ IBM285 = 2038,
+ IBM290 = 2039,
+ IBM297 = 2040,
+ IBM420 = 2041,
+ IBM423 = 2042,
+ IBM424 = 2043,
+ PC8CodePage437 = 2011,
+ IBM500 = 2044,
+ IBM851 = 2045,
+ PCp852 = 2010,
+ IBM855 = 2046,
+ IBM857 = 2047,
+ IBM860 = 2048,
+ IBM861 = 2049,
+ IBM863 = 2050,
+ IBM864 = 2051,
+ IBM865 = 2052,
+ IBM868 = 2053,
+ IBM869 = 2054,
+ IBM870 = 2055,
+ IBM871 = 2056,
+ IBM880 = 2057,
+ IBM891 = 2058,
+ IBM903 = 2059,
+ IBBM904 = 2060,
+ IBM905 = 2061,
+ IBM918 = 2062,
+ IBM1026 = 2063,
+ IBMEBCDICATDE = 2064,
+ EBCDICATDEA = 2065,
+ EBCDICCAFR = 2066,
+ EBCDICDKNO = 2067,
+ EBCDICDKNOA = 2068,
+ EBCDICFISE = 2069,
+ EBCDICFISEA = 2070,
+ EBCDICFR = 2071,
+ EBCDICIT = 2072,
+ EBCDICPT = 2073,
+ EBCDICES = 2074,
+ EBCDICESA = 2075,
+ EBCDICESS = 2076,
+ EBCDICUK = 2077,
+ EBCDICUS = 2078,
+ Unknown8BiT = 2079,
+ Mnemonic = 2080,
+ Mnem = 2081,
+ VISCII = 2082,
+ VIQR = 2083,
+ KOI8R = 2084,
+ HZGB2312 = 2085,
+ IBM866 = 2086,
+ PC775Baltic = 2087,
+ KOI8U = 2088,
+ IBM00858 = 2089,
+ IBM00924 = 2090,
+ IBM01140 = 2091,
+ IBM01141 = 2092,
+ IBM01142 = 2093,
+ IBM01143 = 2094,
+ IBM01144 = 2095,
+ IBM01145 = 2096,
+ IBM01146 = 2097,
+ IBM01147 = 2098,
+ IBM01148 = 2099,
+ IBM01149 = 2100,
+ Big5HKSCS = 2101,
+ IBM1047 = 2102,
+ PTCP154 = 2103,
+ Amiga1251 = 2104,
+ KOI7switched = 2105,
+ BRF = 2106,
+ TSCII = 2107,
+ CP51932 = 2108,
+ windows874 = 2109,
+ windows1250 = 2250,
+ windows1251 = 2251,
+ windows1252 = 2252,
+ windows1253 = 2253,
+ windows1254 = 2254,
+ windows1255 = 2255,
+ windows1256 = 2256,
+ windows1257 = 2257,
+ windows1258 = 2258,
+ TIS620 = 2259,
+ CP50220 = 2260,
+ reserved = 3000
+ };
+
+ using enum id;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr text_encoding() = default;
+ _LIBCPP_HIDE_FROM_ABI constexpr explicit text_encoding(string_view __enc) _NOEXCEPT
+ : __encoding_rep_(__find_encoding_data(__enc)) {
+ __enc.copy(__name_, max_name_length, 0);
+ }
+ _LIBCPP_HIDE_FROM_ABI constexpr text_encoding(id __i) _NOEXCEPT : __encoding_rep_(__find_encoding_data_by_id(__i)) {
+ if (__encoding_rep_->__name[0] != '\0')
+ std::copy_n(__encoding_rep_->__name, std::char_traits<char>::length(__encoding_rep_->__name), __name_);
+ }
+
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr id mib() const _NOEXCEPT { return id(__encoding_rep_->__mib_rep); }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const char* name() const _NOEXCEPT { return __name_; }
+
+ // [text.encoding.aliases], class text_encoding::aliases_view
+ struct aliases_view : ranges::view_interface<aliases_view> {
+ constexpr aliases_view() = default;
+ constexpr aliases_view(const __encoding_data* __d) : __view_data_(__d) {}
+ struct __end_sentinel {};
+ struct __iterator {
+ using value_type = const char*;
+ using reference = const char*;
+ using difference_type = ptrdiff_t;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator() noexcept = default;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
+ if (__can_dereference())
+ return __data_->__name;
+ std::unreachable();
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr value_type operator[](difference_type __n) const {
+ auto __it = *this;
+ return *(__it + __n);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __it, difference_type __n) {
+ __it += __n;
+ return __it;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __it) {
+ __it += __n;
+ return __it;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __it, difference_type __n) {
+ __it -= __n;
+ return __it;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr difference_type operator-(const __iterator& __other) const
+ {
+ if(__other.__mib_rep_ == __mib_rep_)
+ return __mib_rep_ - __other.__mib_rep_;
+ std::unreachable();
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(difference_type __n, __iterator& __it) {
+ __it -= __n;
+ return __it;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator++() {
+ __data_++;
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator++(int) {
+ auto __old = *this;
+ __data_++;
+ return __old;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator--() {
+ __data_--;
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator operator--(int) {
+ auto __old = *this;
+ __data_--;
+ return __old;
+ }
+
+ // Check if going past the encoding data list array and if the new index has the same id, if not then
+ // replace it with a sentinel "out-of-bounds" iterator.
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __n) {
+ if (__data_) [[__likely__]] {
+ if (__n > 0) {
+ if ((__data_ + __n) < std::end(__text_encoding_data) && __data_[__n - 1].__mib_rep == __mib_rep_)
+ __data_ += __n;
+ else
+ *this = __iterator{};
+ } else if (__n < 0) {
+ if ((__data_ + __n) > __text_encoding_data && __data_[__n].__mib_rep == __mib_rep_)
+ __data_ += __n;
+ else
+ *this = __iterator{};
+ }
+ }
+ return *this;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator-=(difference_type __n) { return operator+=(-__n); }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const __iterator& __it) const {
+ return __data_ == __it.__data_ && __it.__mib_rep_ == __mib_rep_;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr bool operator==(__end_sentinel) const { return !__can_dereference(); }
+
+ _LIBCPP_HIDE_FROM_ABI constexpr auto operator<=>(__iterator __it) const { return __data_ <=> __it.__data_; }
+
+ private:
+ friend struct text_encoding;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator(const __encoding_data* __enc_d) noexcept
+ : __data_(__enc_d), __mib_rep_(__enc_d ? __enc_d->__mib_rep : 0) {}
+
+ _LIBCPP_HIDE_FROM_ABI bool __can_dereference() const { return __data_ && __data_->__mib_rep == __mib_rep_; }
+
+ // default iterator is a sentinel
+ const __encoding_data* __data_ = nullptr;
+ __encoding_data::__id_rep __mib_rep_ = 0;
+ };
+
+ constexpr __iterator begin() const { return __iterator{__view_data_}; }
+ constexpr __end_sentinel end() const { return __end_sentinel{}; }
+
+ private:
+ const __encoding_data* __view_data_ = nullptr;
+ };
+
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const _NOEXCEPT {
+ auto __rep = __encoding_rep_ - 1;
+ if (__encoding_rep_->__name[0]) {
+ while (__rep > std::begin(__text_encoding_data) && (__rep--)->__mib_rep == __encoding_rep_->__mib_rep)
+ ;
+ } else {
+ __rep = nullptr;
+ }
+
+ return aliases_view(__rep);
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) _NOEXCEPT {
+ if (__a.mib() == id::other && __b.mib() == id::other)
+ return __comp_name(__a.__name_, __b.__name_);
+
+ return __a.mib() == __b.mib();
+ }
+
+ _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __encoding, id __i) _NOEXCEPT {
+ return __encoding.mib() == __i;
+ }
+
+# if __CHAR_BIT__ == 8
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() _NOEXCEPT {
+# ifdef __GNUC_EXECUTION_CHARSET_NAME
+ return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
+# elif defined(__clang_literal_encoding__)
+ return text_encoding(__clang_literal_encoding__);
+# elif defined(__clang__)
+ return text_encoding(id::UTF8);
+# else
+ return {};
+# endif
+ }
+
+ [[nodiscard]] static text_encoding environment();
+
+ template <id __i>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() {
+ return environment() == __i;
+ }
+
+# else
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() = delete;
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment() = delete;
+ template <id __i>
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() = delete;
+# endif
+
+private:
+ _LIBCPP_HIDE_FROM_ABI static constexpr bool __comp_name(string_view __a, string_view __b) {
+ if (__a.empty() || __b.empty()) {
+ return false;
+ }
+
+ // map any non-alphanumeric character to 255, skip prefix 0s, else get tolower(__n)
+ auto __map_char = [](char __n, bool& __in_number) -> unsigned char {
+ auto __to_lower = [](char __n) -> char { return (__n >= 'A' && __n <= 'Z') ? __n + ('a' - 'A') : __n; };
+ if (__n == '0') {
+ return __in_number ? '0' : 255;
+ }
+ __in_number = __n >= '1' && __n <= '9';
+ return (__n >= '1' && __n <= '9') || (__n >= 'A' && __n <= 'Z') || (__n >= 'a' && __n <= 'z')
+ ? __to_lower(__n)
+ : 255;
+ };
+
+ auto __a_ptr = __a.begin(), __b_ptr = __b.begin();
+ bool __a_in_number = false, __b_in_number = false;
+
+ unsigned char __a_val = 255, __b_val = 255;
+ for (;; __a_ptr++, __b_ptr++) {
+ while ((__a_val = __map_char(*__a_ptr, __a_in_number)) == 255 && __a_ptr != __a.end())
+ __a_ptr++;
+ while ((__b_val = __map_char(*__b_ptr, __b_in_number)) == 255 && __b_ptr != __b.end())
+ __b_ptr++;
+
+ if (__a_ptr == __a.end())
+ return __b_ptr == __b.end();
+ if (__b_ptr == __b.end())
+ return false;
+ if (__a_val != __b_val)
+ return false;
+ }
+ return true;
+ }
+
+ _LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data(string_view __a) {
+ auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data) - 1;
+
+ for (; __data_ptr != __data_last; __data_ptr++) {
+ if (__comp_name(__a, __data_ptr->__name)) {
+ const auto __found_id = __data_ptr->__mib_rep;
+ while (__data_ptr[-1].__mib_rep == __found_id)
+ __data_ptr--;
+ return __data_ptr;
+ }
+ }
+
+ return __text_encoding_data; // other
+ }
+
+ _LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
+ auto __found = std::lower_bound(
+ std::begin(__text_encoding_data), std::end(__text_encoding_data) - 1, __encoding_data::__id_rep(__i));
+ return __found != std::end(__text_encoding_data) - 1 ? __found : __text_encoding_data + 1; // unknown
+ }
+
+ _LIBCPP_HIDE_FROM_ABI static constexpr __encoding_data __text_encoding_data[] = {
+ {1, ""},
+ {2, ""},
+ {3, "ANSI_X3.4-1968"},
+ {3, "ANSI_X3.4-1986"},
+ {3, "IBM367"},
+ {3, "ISO646-US"},
+ {3, "ISO_646.irv:1991"},
+ {3, "cp367"},
+ {3, "csASCII"},
+ {3, "iso-ir-6"},
+ {3, "us"},
+ {4, "ISO-8859-1"},
+ {4, "ISO_8859-1:1987"},
+ {4, "CP819"},
+ {4, "IBM819"},
+ {4, "ISO_8859-1"},
+ {4, "csISOLatin1"},
+ {4, "iso-ir-100"},
+ {4, "l1"},
+ {4, "latin1"},
+ {5, "ISO-8859-2"},
+ {5, "ISO_8859-2:1987"},
+ {5, "ISO_8859-2"},
+ {5, "csISOLatin2"},
+ {5, "iso-ir-101"},
+ {5, "l2"},
+ {5, "latin2"},
+ {6, "ISO-8859-3"},
+ {6, "ISO_8859-3:1988"},
+ {6, "ISO_8859-3"},
+ {6, "csISOLatin3"},
+ {6, "iso-ir-109"},
+ {6, "l3"},
+ {6, "latin3"},
+ {7, "ISO-8859-4"},
+ {7, "ISO_8859-4:1988"},
+ {7, "ISO_8859-4"},
+ {7, "csISOLatin4"},
+ {7, "iso-ir-110"},
+ {7, "l4"},
+ {7, "latin4"},
+ {8, "ISO-8859-5"},
+ {8, "ISO_8859-5:1988"},
+ {8, "ISO_8859-5"},
+ {8, "csISOLatinCyrillic"},
+ {8, "cyrillic"},
+ {8, "iso-ir-144"},
+ {9, "ISO-8859-6"},
+ {9, "ISO_8859-6:1987"},
+ {9, "ASMO-708"},
+ {9, "ECMA-114"},
+ {9, "ISO_8859-6"},
+ {9, "arabic"},
+ {9, "csISOLatinArabic"},
+ {9, "iso-ir-127"},
+ {10, "ISO-8859-7"},
+ {10, "ISO_8859-7:1987"},
+ {10, "ECMA-118"},
+ {10, "ELOT_928"},
+ {10, "ISO_8859-7"},
+ {10, "csISOLatinGreek"},
+ {10, "greek"},
+ {10, "greek8"},
+ {10, "iso-ir-126"},
+ {11, "ISO-8859-8"},
+ {11, "ISO_8859-8:1988"},
+ {11, "ISO_8859-8"},
+ {11, "csISOLatinHebrew"},
+ {11, "hebrew"},
+ {11, "iso-ir-138"},
+ {12, "ISO-8859-9"},
+ {12, "ISO_8859-9:1989"},
+ {12, "ISO_8859-9"},
+ {12, "csISOLatin5"},
+ {12, "iso-ir-148"},
+ {12, "l5"},
+ {12, "latin5"},
+ {13, "ISO-8859-10"},
+ {13, "ISO_8859-10:1992"},
+ {13, "csISOLatin6"},
+ {13, "iso-ir-157"},
+ {13, "l6"},
+ {13, "latin6"},
+ {14, "ISO_6937-2-add"},
+ {14, "csISOTextComm"},
+ {14, "iso-ir-142"},
+ {15, "JIS_X0201"},
+ {15, "X0201"},
+ {15, "csHalfWidthKatakana"},
+ {16, "JIS_Encoding"},
+ {16, "csJISEncoding"},
+ {17, "Shift_JIS"},
+ {17, "MS_Kanji"},
+ {17, "csShiftJIS"},
+ {18, "EUC-JP"},
+ {18, "Extended_UNIX_Code_Packed_Format_for_Japanese"},
+ {18, "csEUCPkdFmtJapanese"},
+ {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese"},
+ {19, "csEUCFixWidJapanese"},
+ {20, "BS_4730"},
+ {20, "ISO646-GB"},
+ {20, "csISO4UnitedKingdom"},
+ {20, "gb"},
+ {20, "iso-ir-4"},
+ {20, "uk"},
+ {21, "SEN_850200_C"},
+ {21, "ISO646-SE2"},
+ {21, "csISO11SwedishForNames"},
+ {21, "iso-ir-11"},
+ {21, "se2"},
+ {22, "IT"},
+ {22, "ISO646-IT"},
+ {22, "csISO15Italian"},
+ {22, "iso-ir-15"},
+ {23, "ES"},
+ {23, "ISO646-ES"},
+ {23, "csISO17Spanish"},
+ {23, "iso-ir-17"},
+ {24, "DIN_66003"},
+ {24, "ISO646-DE"},
+ {24, "csISO21German"},
+ {24, "de"},
+ {24, "iso-ir-21"},
+ {25, "NS_4551-1"},
+ {25, "ISO646-NO"},
+ {25, "csISO60DanishNorwegian"},
+ {25, "csISO60Norwegian1"},
+ {25, "iso-ir-60"},
+ {25, "no"},
+ {26, "NF_Z_62-010"},
+ {26, "ISO646-FR"},
+ {26, "csISO69French"},
+ {26, "fr"},
+ {26, "iso-ir-69"},
+ {27, "ISO-10646-UTF-1"},
+ {27, "csISO10646UTF1"},
+ {28, "ISO_646.basic:1983"},
+ {28, "csISO646basic1983"},
+ {28, "ref"},
+ {29, "INVARIANT"},
+ {29, "csINVARIANT"},
+ {30, "ISO_646.irv:1983"},
+ {30, "csISO2IntlRefVersion"},
+ {30, "irv"},
+ {30, "iso-ir-2"},
+ {31, "NATS-SEFI"},
+ {31, "csNATSSEFI"},
+ {31, "iso-ir-8-1"},
+ {32, "NATS-SEFI-ADD"},
+ {32, "csNATSSEFIADD"},
+ {32, "iso-ir-8-2"},
+ {33, "NATS-DANO"},
+ {33, "csNATSDANO"},
+ {33, "iso-ir-9-1"},
+ {34, "NATS-DANO-ADD"},
+ {34, "csNATSDANOADD"},
+ {34, "iso-ir-9-2"},
+ {35, "SEN_850200_B"},
+ {35, "FI"},
+ {35, "ISO646-FI"},
+ {35, "ISO646-SE"},
+ {35, "csISO10Swedish"},
+ {35, "iso-ir-10"},
+ {35, "se"},
+ {36, "KS_C_5601-1987"},
+ {36, "KSC_5601"},
+ {36, "KS_C_5601-1989"},
+ {36, "csKSC56011987"},
+ {36, "iso-ir-149"},
+ {36, "korean"},
+ {37, "ISO-2022-KR"},
+ {37, "csISO2022KR"},
+ {38, "EUC-KR"},
+ {38, "csEUCKR"},
+ {39, "ISO-2022-JP"},
+ {39, "csISO2022JP"},
+ {40, "ISO-2022-JP-2"},
+ {40, "csISO2022JP2"},
+ {41, "JIS_C6220-1969-jp"},
+ {41, "JIS_C6220-1969"},
+ {41, "csISO13JISC6220jp"},
+ {41, "iso-ir-13"},
+ {41, "katakana"},
+ {41, "x0201-7"},
+ {42, "JIS_C6220-1969-ro"},
+ {42, "ISO646-JP"},
+ {42, "csISO14JISC6220ro"},
+ {42, "iso-ir-14"},
+ {42, "jp"},
+ {43, "PT"},
+ {43, "ISO646-PT"},
+ {43, "csISO16Portuguese"},
+ {43, "iso-ir-16"},
+ {44, "greek7-old"},
+ {44, "csISO18Greek7Old"},
+ {44, "iso-ir-18"},
+ {45, "latin-greek"},
+ {45, "csISO19LatinGreek"},
+ {45, "iso-ir-19"},
+ {46, "NF_Z_62-010_(1973)"},
+ {46, "ISO646-FR1"},
+ {46, "csISO25French"},
+ {46, "iso-ir-25"},
+ {47, "Latin-greek-1"},
+ {47, "csISO27LatinGreek1"},
+ {47, "iso-ir-27"},
+ {48, "ISO_5427"},
+ {48, "csISO5427Cyrillic"},
+ {48, "iso-ir-37"},
+ {49, "JIS_C6226-1978"},
+ {49, "csISO42JISC62261978"},
+ {49, "iso-ir-42"},
+ {50, "BS_viewdata"},
+ {50, "csISO47BSViewdata"},
+ {50, "iso-ir-47"},
+ {51, "INIS"},
+ {51, "csISO49INIS"},
+ {51, "iso-ir-49"},
+ {52, "INIS-8"},
+ {52, "csISO50INIS8"},
+ {52, "iso-ir-50"},
+ {53, "INIS-cyrillic"},
+ {53, "csISO51INISCyrillic"},
+ {53, "iso-ir-51"},
+ {54, "ISO_5427:1981"},
+ {54, "ISO5427Cyrillic1981"},
+ {54, "csISO54271981"},
+ {54, "iso-ir-54"},
+ {55, "ISO_5428:1980"},
+ {55, "csISO5428Greek"},
+ {55, "iso-ir-55"},
+ {56, "GB_1988-80"},
+ {56, "ISO646-CN"},
+ {56, "cn"},
+ {56, "csISO57GB1988"},
+ {56, "iso-ir-57"},
+ {57, "GB_2312-80"},
+ {57, "chinese"},
+ {57, "csISO58GB231280"},
+ {57, "iso-ir-58"},
+ {58, "NS_4551-2"},
+ {58, "ISO646-NO2"},
+ {58, "csISO61Norwegian2"},
+ {58, "iso-ir-61"},
+ {58, "no2"},
+ {59, "videotex-suppl"},
+ {59, "csISO70VideotexSupp1"},
+ {59, "iso-ir-70"},
+ {60, "PT2"},
+ {60, "ISO646-PT2"},
+ {60, "csISO84Portuguese2"},
+ {60, "iso-ir-84"},
+ {61, "ES2"},
+ {61, "ISO646-ES2"},
+ {61, "csISO85Spanish2"},
+ {61, "iso-ir-85"},
+ {62, "MSZ_7795.3"},
+ {62, "ISO646-HU"},
+ {62, "csISO86Hungarian"},
+ {62, "hu"},
+ {62, "iso-ir-86"},
+ {63, "JIS_C6226-1983"},
+ {63, "JIS_X0208-1983"},
+ {63, "csISO87JISX0208"},
+ {63, "iso-ir-87"},
+ {63, "x0208"},
+ {64, "greek7"},
+ {64, "csISO88Greek7"},
+ {64, "iso-ir-88"},
+ {65, "ASMO_449"},
+ {65, "ISO_9036"},
+ {65, "arabic7"},
+ {65, "csISO89ASMO449"},
+ {65, "iso-ir-89"},
+ {66, "iso-ir-90"},
+ {66, "csISO90"},
+ {67, "JIS_C6229-1984-a"},
+ {67, "csISO91JISC62291984a"},
+ {67, "iso-ir-91"},
+ {67, "jp-ocr-a"},
+ {68, "JIS_C6229-1984-b"},
+ {68, "ISO646-JP-OCR-B"},
+ {68, "csISO92JISC62991984b"},
+ {68, "iso-ir-92"},
+ {68, "jp-ocr-b"},
+ {69, "JIS_C6229-1984-b-add"},
+ {69, "csISO93JIS62291984badd"},
+ {69, "iso-ir-93"},
+ {69, "jp-ocr-b-add"},
+ {70, "JIS_C6229-1984-hand"},
+ {70, "csISO94JIS62291984hand"},
+ {70, "iso-ir-94"},
+ {70, "jp-ocr-hand"},
+ {71, "JIS_C6229-1984-hand-add"},
+ {71, "csISO95JIS62291984handadd"},
+ {71, "iso-ir-95"},
+ {71, "jp-ocr-hand-add"},
+ {72, "JIS_C6229-1984-kana"},
+ {72, "csISO96JISC62291984kana"},
+ {72, "iso-ir-96"},
+ {73, "ISO_2033-1983"},
+ {73, "csISO2033"},
+ {73, "e13b"},
+ {73, "iso-ir-98"},
+ {74, "ANSI_X3.110-1983"},
+ {74, "CSA_T500-1983"},
+ {74, "NAPLPS"},
+ {74, "csISO99NAPLPS"},
+ {74, "iso-ir-99"},
+ {75, "T.61-7bit"},
+ {75, "csISO102T617bit"},
+ {75, "iso-ir-102"},
+ {76, "T.61-8bit"},
+ {76, "T.61"},
+ {76, "csISO103T618bit"},
+ {76, "iso-ir-103"},
+ {77, "ECMA-cyrillic"},
+ {77, "KOI8-E"},
+ {77, "csISO111ECMACyrillic"},
+ {77, "iso-ir-111"},
+ {78, "CSA_Z243.4-1985-1"},
+ {78, "ISO646-CA"},
+ {78, "ca"},
+ {78, "csISO121Canadian1"},
+ {78, "csa7-1"},
+ {78, "csa71"},
+ {78, "iso-ir-121"},
+ {79, "CSA_Z243.4-1985-2"},
+ {79, "ISO646-CA2"},
+ {79, "csISO122Canadian2"},
+ {79, "csa7-2"},
+ {79, "csa72"},
+ {79, "iso-ir-122"},
+ {80, "CSA_Z243.4-1985-gr"},
+ {80, "csISO123CSAZ24341985gr"},
+ {80, "iso-ir-123"},
+ {81, "ISO-8859-6-E"},
+ {81, "ISO_8859-6-E"},
+ {81, "csISO88596E"},
+ {82, "ISO-8859-6-I"},
+ {82, "ISO_8859-6-I"},
+ {82, "csISO88596I"},
+ {83, "T.101-G2"},
+ {83, "csISO128T101G2"},
+ {83, "iso-ir-128"},
+ {84, "ISO-8859-8-E"},
+ {84, "ISO_8859-8-E"},
+ {84, "csISO88598E"},
+ {85, "ISO-8859-8-I"},
+ {85, "ISO_8859-8-I"},
+ {85, "csISO88598I"},
+ {86, "CSN_369103"},
+ {86, "csISO139CSN369103"},
+ {86, "iso-ir-139"},
+ {87, "JUS_I.B1.002"},
+ {87, "ISO646-YU"},
+ {87, "csISO141JUSIB1002"},
+ {87, "iso-ir-141"},
+ {87, "js"},
+ {87, "yu"},
+ {88, "IEC_P27-1"},
+ {88, "csISO143IECP271"},
+ {88, "iso-ir-143"},
+ {89, "JUS_I.B1.003-serb"},
+ {89, "csISO146Serbian"},
+ {89, "iso-ir-146"},
+ {89, "serbian"},
+ {90, "JUS_I.B1.003-mac"},
+ {90, "csISO147Macedonian"},
+ {90, "iso-ir-147"},
+ {90, "macedonian"},
+ {91, "greek-ccitt"},
+ {91, "csISO150"},
+ {91, "csISO150GreekCCITT"},
+ {91, "iso-ir-150"},
+ {92, "NC_NC00-10:81"},
+ {92, "ISO646-CU"},
+ {92, "csISO151Cuba"},
+ {92, "cuba"},
+ {92, "iso-ir-151"},
+ {93, "ISO_6937-2-25"},
+ {93, "csISO6937Add"},
+ {93, "iso-ir-152"},
+ {94, "GOST_19768-74"},
+ {94, "ST_SEV_358-88"},
+ {94, "csISO153GOST1976874"},
+ {94, "iso-ir-153"},
+ {95, "ISO_8859-supp"},
+ {95, "csISO8859Supp"},
+ {95, "iso-ir-154"},
+ {95, "latin1-2-5"},
+ {96, "ISO_10367-box"},
+ {96, "csISO10367Box"},
+ {96, "iso-ir-155"},
+ {97, "latin-lap"},
+ {97, "csISO158Lap"},
+ {97, "iso-ir-158"},
+ {97, "lap"},
+ {98, "JIS_X0212-1990"},
+ {98, "csISO159JISX02121990"},
+ {98, "iso-ir-159"},
+ {98, "x0212"},
+ {99, "DS_2089"},
+ {99, "DS2089"},
+ {99, "ISO646-DK"},
+ {99, "csISO646Danish"},
+ {99, "dk"},
+ {100, "us-dk"},
+ {100, "csUSDK"},
+ {101, "dk-us"},
+ {101, "csDKUS"},
+ {102, "KSC5636"},
+ {102, "ISO646-KR"},
+ {102, "csKSC5636"},
+ {103, "UNICODE-1-1-UTF-7"},
+ {103, "csUnicode11UTF7"},
+ {104, "ISO-2022-CN"},
+ {104, "csISO2022CN"},
+ {105, "ISO-2022-CN-EXT"},
+ {105, "csISO2022CNEXT"},
+ {106, "UTF-8"},
+ {106, "csUTF8"},
+ {109, "ISO-8859-13"},
+ {109, "csISO885913"},
+ {110, "ISO-8859-14"},
+ {110, "ISO_8859-14"},
+ {110, "ISO_8859-14:1998"},
+ {110, "csISO885914"},
+ {110, "iso-celtic"},
+ {110, "iso-ir-199"},
+ {110, "l8"},
+ {110, "latin8"},
+ {111, "ISO-8859-15"},
+ {111, "ISO_8859-15"},
+ {111, "Latin-9"},
+ {111, "csISO885915"},
+ {112, "ISO-8859-16"},
+ {112, "ISO_8859-16"},
+ {112, "ISO_8859-16:2001"},
+ {112, "csISO885916"},
+ {112, "iso-ir-226"},
+ {112, "l10"},
+ {112, "latin10"},
+ {113, "GBK"},
+ {113, "CP936"},
+ {113, "MS936"},
+ {113, "csGBK"},
+ {113, "windows-936"},
+ {114, "GB18030"},
+ {114, "csGB18030"},
+ {115, "OSD_EBCDIC_DF04_15"},
+ {115, "csOSDEBCDICDF0415"},
+ {116, "OSD_EBCDIC_DF03_IRV"},
+ {116, "csOSDEBCDICDF03IRV"},
+ {117, "OSD_EBCDIC_DF04_1"},
+ {117, "csOSDEBCDICDF041"},
+ {118, "ISO-11548-1"},
+ {118, "ISO_11548-1"},
+ {118, "ISO_TR_11548-1"},
+ {118, "csISO115481"},
+ {119, "KZ-1048"},
+ {119, "RK1048"},
+ {119, "STRK1048-2002"},
+ {119, "csKZ1048"},
+ {1000, "ISO-10646-UCS-2"},
+ {1000, "csUnicode"},
+ {1001, "ISO-10646-UCS-4"},
+ {1001, "csUCS4"},
+ {1002, "ISO-10646-UCS-Basic"},
+ {1002, "csUnicodeASCII"},
+ {1003, "ISO-10646-Unicode-Latin1"},
+ {1003, "ISO-10646"},
+ {1003, "csUnicodeLatin1"},
+ {1004, "ISO-10646-J-1"},
+ {1004, "csUnicodeJapanese"},
+ {1005, "ISO-Unicode-IBM-1261"},
+ {1005, "csUnicodeIBM1261"},
+ {1006, "ISO-Unicode-IBM-1268"},
+ {1006, "csUnicodeIBM1268"},
+ {1007, "ISO-Unicode-IBM-1276"},
+ {1007, "csUnicodeIBM1276"},
+ {1008, "ISO-Unicode-IBM-1264"},
+ {1008, "csUnicodeIBM1264"},
+ {1009, "ISO-Unicode-IBM-1265"},
+ {1009, "csUnicodeIBM1265"},
+ {1010, "UNICODE-1-1"},
+ {1010, "csUnicode11"},
+ {1011, "SCSU"},
+ {1011, "csSCSU"},
+ {1012, "UTF-7"},
+ {1012, "csUTF7"},
+ {1013, "UTF-16BE"},
+ {1013, "csUTF16BE"},
+ {1014, "UTF-16LE"},
+ {1014, "csUTF16LE"},
+ {1015, "UTF-16"},
+ {1015, "csUTF16"},
+ {1016, "CESU-8"},
+ {1016, "csCESU-8"},
+ {1016, "csCESU8"},
+ {1017, "UTF-32"},
+ {1017, "csUTF32"},
+ {1018, "UTF-32BE"},
+ {1018, "csUTF32BE"},
+ {1019, "UTF-32LE"},
+ {1019, "csUTF32LE"},
+ {1020, "BOCU-1"},
+ {1020, "csBOCU-1"},
+ {1020, "csBOCU1"},
+ {1021, "UTF-7-IMAP"},
+ {1021, "csUTF7IMAP"},
+ {2000, "ISO-8859-1-Windows-3.0-Latin-1"},
+ {2000, "csWindows30Latin1"},
+ {2001, "ISO-8859-1-Windows-3.1-Latin-1"},
+ {2001, "csWindows31Latin1"},
+ {2002, "ISO-8859-2-Windows-Latin-2"},
+ {2002, "csWindows31Latin2"},
+ {2003, "ISO-8859-9-Windows-Latin-5"},
+ {2003, "csWindows31Latin5"},
+ {2004, "hp-roman8"},
+ {2004, "csHPRoman8"},
+ {2004, "r8"},
+ {2004, "roman8"},
+ {2005, "Adobe-Standard-Encoding"},
+ {2005, "csAdobeStandardEncoding"},
+ {2006, "Ventura-US"},
+ {2006, "csVenturaUS"},
+ {2007, "Ventura-International"},
+ {2007, "csVenturaInternational"},
+ {2008, "DEC-MCS"},
+ {2008, "csDECMCS"},
+ {2008, "dec"},
+ {2009, "IBM850"},
+ {2009, "850"},
+ {2009, "cp850"},
+ {2009, "csPC850Multilingual"},
+ {2010, "IBM852"},
+ {2010, "852"},
+ {2010, "cp852"},
+ {2010, "csPCp852"},
+ {2011, "IBM437"},
+ {2011, "437"},
+ {2011, "cp437"},
+ {2011, "csPC8CodePage437"},
+ {2012, "PC8-Danish-Norwegian"},
+ {2012, "csPC8DanishNorwegian"},
+ {2013, "IBM862"},
+ {2013, "862"},
+ {2013, "cp862"},
+ {2013, "csPC862LatinHebrew"},
+ {2014, "PC8-Turkish"},
+ {2014, "csPC8Turkish"},
+ {2015, "IBM-Symbols"},
+ {2015, "csIBMSymbols"},
+ {2016, "IBM-Thai"},
+ {2016, "csIBMThai"},
+ {2017, "HP-Legal"},
+ {2017, "csHPLegal"},
+ {2018, "HP-Pi-font"},
+ {2018, "csHPPiFont"},
+ {2019, "HP-Math8"},
+ {2019, "csHPMath8"},
+ {2020, "Adobe-Symbol-Encoding"},
+ {2020, "csHPPSMath"},
+ {2021, "HP-DeskTop"},
+ {2021, "csHPDesktop"},
+ {2022, "Ventura-Math"},
+ {2022, "csVenturaMath"},
+ {2023, "Microsoft-Publishing"},
+ {2023, "csMicrosoftPublishing"},
+ {2024, "Windows-31J"},
+ {2024, "csWindows31J"},
+ {2025, "GB2312"},
+ {2025, "csGB2312"},
+ {2026, "Big5"},
+ {2026, "csBig5"},
+ {2027, "macintosh"},
+ {2027, "csMacintosh"},
+ {2027, "mac"},
+ {2028, "IBM037"},
+ {2028, "cp037"},
+ {2028, "csIBM037"},
+ {2028, "ebcdic-cp-ca"},
+ {2028, "ebcdic-cp-nl"},
+ {2028, "ebcdic-cp-us"},
+ {2028, "ebcdic-cp-wt"},
+ {2029, "IBM038"},
+ {2029, "EBCDIC-INT"},
+ {2029, "cp038"},
+ {2029, "csIBM038"},
+ {2030, "IBM273"},
+ {2030, "CP273"},
+ {2030, "csIBM273"},
+ {2031, "IBM274"},
+ {2031, "CP274"},
+ {2031, "EBCDIC-BE"},
+ {2031, "csIBM274"},
+ {2032, "IBM275"},
+ {2032, "EBCDIC-BR"},
+ {2032, "cp275"},
+ {2032, "csIBM275"},
+ {2033, "IBM277"},
+ {2033, "EBCDIC-CP-DK"},
+ {2033, "EBCDIC-CP-NO"},
+ {2033, "csIBM277"},
+ {2034, "IBM278"},
+ {2034, "CP278"},
+ {2034, "csIBM278"},
+ {2034, "ebcdic-cp-fi"},
+ {2034, "ebcdic-cp-se"},
+ {2035, "IBM280"},
+ {2035, "CP280"},
+ {2035, "csIBM280"},
+ {2035, "ebcdic-cp-it"},
+ {2036, "IBM281"},
+ {2036, "EBCDIC-JP-E"},
+ {2036, "cp281"},
+ {2036, "csIBM281"},
+ {2037, "IBM284"},
+ {2037, "CP284"},
+ {2037, "csIBM284"},
+ {2037, "ebcdic-cp-es"},
+ {2038, "IBM285"},
+ {2038, "CP285"},
+ {2038, "csIBM285"},
+ {2038, "ebcdic-cp-gb"},
+ {2039, "IBM290"},
+ {2039, "EBCDIC-JP-kana"},
+ {2039, "cp290"},
+ {2039, "csIBM290"},
+ {2040, "IBM297"},
+ {2040, "cp297"},
+ {2040, "csIBM297"},
+ {2040, "ebcdic-cp-fr"},
+ {2041, "IBM420"},
+ {2041, "cp420"},
+ {2041, "csIBM420"},
+ {2041, "ebcdic-cp-ar1"},
+ {2042, "IBM423"},
+ {2042, "cp423"},
+ {2042, "csIBM423"},
+ {2042, "ebcdic-cp-gr"},
+ {2043, "IBM424"},
+ {2043, "cp424"},
+ {2043, "csIBM424"},
+ {2043, "ebcdic-cp-he"},
+ {2044, "IBM500"},
+ {2044, "CP500"},
+ {2044, "csIBM500"},
+ {2044, "ebcdic-cp-be"},
+ {2044, "ebcdic-cp-ch"},
+ {2045, "IBM851"},
+ {2045, "851"},
+ {2045, "cp851"},
+ {2045, "csIBM851"},
+ {2046, "IBM855"},
+ {2046, "855"},
+ {2046, "cp855"},
+ {2046, "csIBM855"},
+ {2047, "IBM857"},
+ {2047, "857"},
+ {2047, "cp857"},
+ {2047, "csIBM857"},
+ {2048, "IBM860"},
+ {2048, "860"},
+ {2048, "cp860"},
+ {2048, "csIBM860"},
+ {2049, "IBM861"},
+ {2049, "861"},
+ {2049, "cp-is"},
+ {2049, "cp861"},
+ {2049, "csIBM861"},
+ {2050, "IBM863"},
+ {2050, "863"},
+ {2050, "cp863"},
+ {2050, "csIBM863"},
+ {2051, "IBM864"},
+ {2051, "cp864"},
+ {2051, "csIBM864"},
+ {2052, "IBM865"},
+ {2052, "865"},
+ {2052, "cp865"},
+ {2052, "csIBM865"},
+ {2053, "IBM868"},
+ {2053, "CP868"},
+ {2053, "cp-ar"},
+ {2053, "csIBM868"},
+ {2054, "IBM869"},
+ {2054, "869"},
+ {2054, "cp-gr"},
+ {2054, "cp869"},
+ {2054, "csIBM869"},
+ {2055, "IBM870"},
+ {2055, "CP870"},
+ {2055, "csIBM870"},
+ {2055, "ebcdic-cp-roece"},
+ {2055, "ebcdic-cp-yu"},
+ {2056, "IBM871"},
+ {2056, "CP871"},
+ {2056, "csIBM871"},
+ {2056, "ebcdic-cp-is"},
+ {2057, "IBM880"},
+ {2057, "EBCDIC-Cyrillic"},
+ {2057, "cp880"},
+ {2057, "csIBM880"},
+ {2058, "IBM891"},
+ {2058, "cp891"},
+ {2058, "csIBM891"},
+ {2059, "IBM903"},
+ {2059, "cp903"},
+ {2059, "csIBM903"},
+ {2060, "IBM904"},
+ {2060, "904"},
+ {2060, "cp904"},
+ {2060, "csIBBM904"},
+ {2061, "IBM905"},
+ {2061, "CP905"},
+ {2061, "csIBM905"},
+ {2061, "ebcdic-cp-tr"},
+ {2062, "IBM918"},
+ {2062, "CP918"},
+ {2062, "csIBM918"},
+ {2062, "ebcdic-cp-ar2"},
+ {2063, "IBM1026"},
+ {2063, "CP1026"},
+ {2063, "csIBM1026"},
+ {2064, "EBCDIC-AT-DE"},
+ {2064, "csIBMEBCDICATDE"},
+ {2065, "EBCDIC-AT-DE-A"},
+ {2065, "csEBCDICATDEA"},
+ {2066, "EBCDIC-CA-FR"},
+ {2066, "csEBCDICCAFR"},
+ {2067, "EBCDIC-DK-NO"},
+ {2067, "csEBCDICDKNO"},
+ {2068, "EBCDIC-DK-NO-A"},
+ {2068, "csEBCDICDKNOA"},
+ {2069, "EBCDIC-FI-SE"},
+ {2069, "csEBCDICFISE"},
+ {2070, "EBCDIC-FI-SE-A"},
+ {2070, "csEBCDICFISEA"},
+ {2071, "EBCDIC-FR"},
+ {2071, "csEBCDICFR"},
+ {2072, "EBCDIC-IT"},
+ {2072, "csEBCDICIT"},
+ {2073, "EBCDIC-PT"},
+ {2073, "csEBCDICPT"},
+ {2074, "EBCDIC-ES"},
+ {2074, "csEBCDICES"},
+ {2075, "EBCDIC-ES-A"},
+ {2075, "csEBCDICESA"},
+ {2076, "EBCDIC-ES-S"},
+ {2076, "csEBCDICESS"},
+ {2077, "EBCDIC-UK"},
+ {2077, "csEBCDICUK"},
+ {2078, "EBCDIC-US"},
+ {2078, "csEBCDICUS"},
+ {2079, "UNKNOWN-8BIT"},
+ {2079, "csUnknown8BiT"},
+ {2080, "MNEMONIC"},
+ {2080, "csMnemonic"},
+ {2081, "MNEM"},
+ {2081, "csMnem"},
+ {2082, "VISCII"},
+ {2082, "csVISCII"},
+ {2083, "VIQR"},
+ {2083, "csVIQR"},
+ {2084, "KOI8-R"},
+ {2084, "csKOI8R"},
+ {2085, "HZ-GB-2312"},
+ {2086, "IBM866"},
+ {2086, "866"},
+ {2086, "cp866"},
+ {2086, "csIBM866"},
+ {2087, "IBM775"},
+ {2087, "cp775"},
+ {2087, "csPC775Baltic"},
+ {2088, "KOI8-U"},
+ {2088, "csKOI8U"},
+ {2089, "IBM00858"},
+ {2089, "CCSID00858"},
+ {2089, "CP00858"},
+ {2089, "PC-Multilingual-850+euro"},
+ {2089, "csIBM00858"},
+ {2090, "IBM00924"},
+ {2090, "CCSID00924"},
+ {2090, "CP00924"},
+ {2090, "csIBM00924"},
+ {2090, "ebcdic-Latin9--euro"},
+ {2091, "IBM01140"},
+ {2091, "CCSID01140"},
+ {2091, "CP01140"},
+ {2091, "csIBM01140"},
+ {2091, "ebcdic-us-37+euro"},
+ {2092, "IBM01141"},
+ {2092, "CCSID01141"},
+ {2092, "CP01141"},
+ {2092, "csIBM01141"},
+ {2092, "ebcdic-de-273+euro"},
+ {2093, "IBM01142"},
+ {2093, "CCSID01142"},
+ {2093, "CP01142"},
+ {2093, "csIBM01142"},
+ {2093, "ebcdic-dk-277+euro"},
+ {2093, "ebcdic-no-277+euro"},
+ {2094, "IBM01143"},
+ {2094, "CCSID01143"},
+ {2094, "CP01143"},
+ {2094, "csIBM01143"},
+ {2094, "ebcdic-fi-278+euro"},
+ {2094, "ebcdic-se-278+euro"},
+ {2095, "IBM01144"},
+ {2095, "CCSID01144"},
+ {2095, "CP01144"},
+ {2095, "csIBM01144"},
+ {2095, "ebcdic-it-280+euro"},
+ {2096, "IBM01145"},
+ {2096, "CCSID01145"},
+ {2096, "CP01145"},
+ {2096, "csIBM01145"},
+ {2096, "ebcdic-es-284+euro"},
+ {2097, "IBM01146"},
+ {2097, "CCSID01146"},
+ {2097, "CP01146"},
+ {2097, "csIBM01146"},
+ {2097, "ebcdic-gb-285+euro"},
+ {2098, "IBM01147"},
+ {2098, "CCSID01147"},
+ {2098, "CP01147"},
+ {2098, "csIBM01147"},
+ {2098, "ebcdic-fr-297+euro"},
+ {2099, "IBM01148"},
+ {2099, "CCSID01148"},
+ {2099, "CP01148"},
+ {2099, "csIBM01148"},
+ {2099, "ebcdic-international-500+euro"},
+ {2100, "IBM01149"},
+ {2100, "CCSID01149"},
+ {2100, "CP01149"},
+ {2100, "csIBM01149"},
+ {2100, "ebcdic-is-871+euro"},
+ {2101, "Big5-HKSCS"},
+ {2101, "csBig5HKSCS"},
+ {2102, "IBM1047"},
+ {2102, "IBM-1047"},
+ {2102, "csIBM1047"},
+ {2103, "PTCP154"},
+ {2103, "CP154"},
+ {2103, "Cyrillic-Asian"},
+ {2103, "PT154"},
+ {2103, "csPTCP154"},
+ {2104, "Amiga-1251"},
+ {2104, "Ami-1251"},
+ {2104, "Ami1251"},
+ {2104, "Amiga1251"},
+ {2104, "csAmiga1251"},
+ {2105, "KOI7-switched"},
+ {2105, "csKOI7switched"},
+ {2106, "BRF"},
+ {2106, "csBRF"},
+ {2107, "TSCII"},
+ {2107, "csTSCII"},
+ {2108, "CP51932"},
+ {2108, "csCP51932"},
+ {2109, "windows-874"},
+ {2109, "cswindows874"},
+ {2250, "windows-1250"},
+ {2250, "cswindows1250"},
+ {2251, "windows-1251"},
+ {2251, "cswindows1251"},
+ {2252, "windows-1252"},
+ {2252, "cswindows1252"},
+ {2253, "windows-1253"},
+ {2253, "cswindows1253"},
+ {2254, "windows-1254"},
+ {2254, "cswindows1254"},
+ {2255, "windows-1255"},
+ {2255, "cswindows1255"},
+ {2256, "windows-1256"},
+ {2256, "cswindows1256"},
+ {2257, "windows-1257"},
+ {2257, "cswindows1257"},
+ {2258, "windows-1258"},
+ {2258, "cswindows1258"},
+ {2259, "TIS-620"},
+ {2259, "ISO-8859-11"},
+ {2259, "csTIS620"},
+ {2260, "CP50220"},
+ {2260, "csCP50220"},
+ {0, nullptr} // sentinel
+ };
+
+ const __encoding_data* __encoding_rep_ = __text_encoding_data + 1;
+ char __name_[max_name_length + 1] = {0};
+};
+
+template <>
+struct hash<text_encoding> {
+ size_t operator()(const text_encoding& __enc) const noexcept { return std::hash<text_encoding::id>()(__enc.mib()); }
+};
+
+namespace ranges {
+
+template <>
+inline constexpr bool enable_borrowed_range<text_encoding::aliases_view> = true;
+
+} // namespace ranges
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER >= 26
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP_HAS_LOCALIZATION
+
+#endif // _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index 7f625cefed1c2..a347538620f4d 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -2056,6 +2056,13 @@ module std [system] {
export *
}
+ module text_encoding {
+ module text_encoding { header "__text_encoding/text_encoding.h" }
+
+ header "text_encoding"
+ export *
+ }
+
module thread {
module formatter { header "__thread/formatter.h" }
module id { header "__thread/id.h" }
diff --git a/libcxx/include/text_encoding b/libcxx/include/text_encoding
new file mode 100644
index 0000000000000..579608e4f939d
--- /dev/null
+++ b/libcxx/include/text_encoding
@@ -0,0 +1,68 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_TEXT_ENCODING
+#define _LIBCPP_TEXT_ENCODING
+
+/* text_encoding synopsis
+namespace std {
+
+struct text_encoding;
+
+// [text.encoding.hash], hash support
+template<class T> struct hash;
+template<> struct hash<text_encoding>;
+
+struct text_encoding
+{
+ static constexpr size_t max_name_length = 63;
+
+ // [text.encoding.id], enumeration text_encoding::id
+ enum class id : int_least32_t {
+ see below
+ };
+ using enum id;
+
+ constexpr text_encoding() = default;
+ constexpr explicit text_encoding(string_view enc) noexcept;
+ constexpr text_encoding(id i) noexcept;
+
+ constexpr id mib() const noexcept;
+ constexpr const char* name() const noexcept;
+
+ // [text.encoding.aliases], class text_encoding::aliases_view
+ struct aliases_view;
+ constexpr aliases_view aliases() const noexcept;
+
+ friend constexpr bool operator==(const text_encoding& a,
+ const text_encoding& b) noexcept;
+ friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept;
+
+ static consteval text_encoding literal() noexcept;
+ static text_encoding environment();
+ template<id i> static bool environment_is();
+
+ private:
+ id mib_ = id::unknown; // exposition only
+ char name_[max_name_length + 1] = {0}; // exposition only
+ static constexpr bool comp-name(string_view a, string_view b); // exposition only
+};
+}
+
+*/
+
+#include <__config>
+
+#if _LIBCPP_STD_VER >= 26
+# include <__text_encoding/text_encoding.h>
+#endif // _LIBCPP_STD_VER >= 26
+
+#include <version>
+
+#endif // _LIBCPP_TEXT_ENCODING
diff --git a/libcxx/include/version b/libcxx/include/version
index 77d97b93adc6c..3079feac1ed54 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -592,7 +592,7 @@ __cpp_lib_void_t 201411L <type_traits>
# undef __cpp_lib_string_view
# define __cpp_lib_string_view 202403L
// # define __cpp_lib_submdspan 202306L
-// # define __cpp_lib_text_encoding 202306L
+# define __cpp_lib_text_encoding 202306L
# undef __cpp_lib_to_chars
// # define __cpp_lib_to_chars 202306L
// # define __cpp_lib_to_string 202306L
diff --git a/libcxx/modules/std.compat.cppm.in b/libcxx/modules/std.compat.cppm.in
index 95931447ccdc6..ce9b8bb5ec279 100644
--- a/libcxx/modules/std.compat.cppm.in
+++ b/libcxx/modules/std.compat.cppm.in
@@ -77,9 +77,6 @@ module;
# if __has_include(<stdfloat>)
# error "please update the header information for <stdfloat> in headers_not_available in utils/libcxx/header_information.py"
# endif // __has_include(<stdfloat>)
-# if __has_include(<text_encoding>)
-# error "please update the header information for <text_encoding> in headers_not_available in utils/libcxx/header_information.py"
-# endif // __has_include(<text_encoding>)
#endif // _WIN32
export module std.compat;
diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in
index 5c523691bff4e..6f396e9ccdd90 100644
--- a/libcxx/modules/std.cppm.in
+++ b/libcxx/modules/std.cppm.in
@@ -138,6 +138,9 @@ module;
# include <syncstream>
#endif
#include <system_error>
+#if _LIBCPP_HAS_LOCALIZATION
+# include <text_encoding>
+#endif
#include <thread>
#include <tuple>
#include <type_traits>
@@ -187,9 +190,6 @@ module;
# if __has_include(<stdfloat>)
# error "please update the header information for <stdfloat> in headers_not_available in utils/libcxx/header_information.py"
# endif // __has_include(<stdfloat>)
-# if __has_include(<text_encoding>)
-# error "please update the header information for <text_encoding> in headers_not_available in utils/libcxx/header_information.py"
-# endif // __has_include(<text_encoding>)
#endif // _WIN32
export module std;
diff --git a/libcxx/modules/std/text_encoding.inc b/libcxx/modules/std/text_encoding.inc
index 6d5e3f1d68c60..23dd71965414c 100644
--- a/libcxx/modules/std/text_encoding.inc
+++ b/libcxx/modules/std/text_encoding.inc
@@ -8,12 +8,9 @@
//===----------------------------------------------------------------------===//
export namespace std {
-#if 0
-# if _LIBCPP_STD_VER >= 23
+#if _LIBCPP_STD_VER >= 26
+ using ::std::hash;
using std::text_encoding;
-
- // hash support
- using std::hash;
-# endif // _LIBCPP_STD_VER >= 23
+ using ::std::ranges::enable_borrowed_range;
#endif
} // namespace std
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 4e9bf900af4c5..1a98812894896 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -92,6 +92,7 @@ if (LIBCXX_ENABLE_LOCALIZATION)
ostream.cpp
regex.cpp
strstream.cpp
+ text_encoding.cpp
)
endif()
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index d981d137cf1ba..6d8dababb84ce 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -558,6 +558,19 @@ locale::locale(const locale& other, const locale& one, category c)
string locale::name() const { return __locale_->name(); }
+# if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
+ text_encoding locale::encoding() const {
+ std::string __name = this->name();
+ if(__name.size() == 1){
+ if(__name[0] == 'C')
+ return std::text_encoding(text_encoding::id::ASCII);
+ if(__name[0] == '*')
+ return std::text_encoding();
+ }
+ return std::text_encoding(__name);
+ }
+# endif // _LIBCPP_STD_VER >= 26
+
void locale::__install_ctor(const locale& other, facet* f, long facet_id) {
if (f)
__locale_ = new __imp(*other.__locale_, f, facet_id);
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
new file mode 100644
index 0000000000000..42812aaed49c7
--- /dev/null
+++ b/libcxx/src/text_encoding.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#include <__locale_dir/locale_base_api.h>
+
+#include <text_encoding>
+
+#if __has_include(<langinfo.h>)
+# include <langinfo.h>
+#endif
+
+#if _LIBCPP_STD_VER >= 26
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+#if __CHAR_BIT__ == 8
+
+text_encoding text_encoding::environment() {
+ auto __make_locale = [](const char* __name) {
+ text_encoding __enc{};
+ if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
+ if (const char* __codeset = nl_langinfo_l(CODESET, __loc)) {
+ string_view __s(__codeset);
+ if (__s.size() < max_name_length)
+ __enc = text_encoding(__s);
+ }
+ __locale::__freelocale(__loc);
+ }
+ return __enc;
+ };
+
+ return __make_locale("");
+ }
+
+# endif // __CHAR_BIT__ == 8
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP_STD_VER > 26
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index ce8f0261f2b27..24e57ab696857 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -1065,6 +1065,21 @@ system_error string
system_error string_view
system_error tuple
system_error version
+text_encoding cctype
+text_encoding clocale
+text_encoding compare
+text_encoding cstdint
+text_encoding cstdio
+text_encoding cstdlib
+text_encoding cstring
+text_encoding cwchar
+text_encoding cwctype
+text_encoding initializer_list
+text_encoding iosfwd
+text_encoding limits
+text_encoding stdexcept
+text_encoding string_view
+text_encoding version
thread array
thread atomic
thread bitset
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
new file mode 100644
index 0000000000000..817b0f0d655db
--- /dev/null
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// WARNING: This test was generated by generate_feature_test_macro_components.py
+// and should not be edited manually.
+
+// <text_encoding>
+
+// Test the feature test macros defined by <text_encoding>
+
+// clang-format off
+
+#include <text_encoding>
+#include "test_macros.h"
+
+#if TEST_STD_VER < 14
+
+# ifdef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should not be defined before c++26"
+# endif
+
+#elif TEST_STD_VER == 14
+
+# ifdef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should not be defined before c++26"
+# endif
+
+#elif TEST_STD_VER == 17
+
+# ifdef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should not be defined before c++26"
+# endif
+
+#elif TEST_STD_VER == 20
+
+# ifdef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should not be defined before c++26"
+# endif
+
+#elif TEST_STD_VER == 23
+
+# ifdef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should not be defined before c++26"
+# endif
+
+#elif TEST_STD_VER > 23
+
+# ifndef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should be defined in c++26"
+# endif
+# if __cpp_lib_text_encoding != 202306L
+# error "__cpp_lib_text_encoding should have the value 202306L in c++26"
+# endif
+
+#endif // TEST_STD_VER > 23
+
+// clang-format on
+
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
index aa33a2788f1eb..ffcaeb4904b60 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp
@@ -7852,17 +7852,11 @@
# endif
# endif
-# if !defined(_LIBCPP_VERSION)
-# ifndef __cpp_lib_text_encoding
-# error "__cpp_lib_text_encoding should be defined in c++26"
-# endif
-# if __cpp_lib_text_encoding != 202306L
-# error "__cpp_lib_text_encoding should have the value 202306L in c++26"
-# endif
-# else
-# ifdef __cpp_lib_text_encoding
-# error "__cpp_lib_text_encoding should not be defined because it is unimplemented in libc++!"
-# endif
+# ifndef __cpp_lib_text_encoding
+# error "__cpp_lib_text_encoding should be defined in c++26"
+# endif
+# if __cpp_lib_text_encoding != 202306L
+# error "__cpp_lib_text_encoding should have the value 202306L in c++26"
# endif
# ifndef __cpp_lib_three_way_comparison
diff --git a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
new file mode 100644
index 0000000000000..7ebfb5a0b4f74
--- /dev/null
+++ b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
@@ -0,0 +1,56 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// libc++ not built with C++26 yet
+// XFAIL: *
+// REQUIRES: std-at-least-c++26
+// REQUIRES: locale.en_US.UTF-8
+// UNSUPPORTED: no-localization
+
+// class locale
+
+// text_encoding encoding() const
+
+// Concerns:
+// 1. Default locale returns a text_encoding representing "ASCII"
+// 2. Locale built with en_US.UTF-8 returns text_encoding representing "UTF-8"
+
+#include <cassert>
+#include <locale>
+#include <text_encoding>
+
+#include "test_macros.h"
+#include "platform_support.h"
+
+using id = std::text_encoding::id;
+
+int main() {
+
+ {
+ std::locale loc;
+
+ auto te = loc.encoding();
+ auto classicTE = std::text_encoding(id::ASCII);
+ assert(te == id::ASCII);
+ assert(te == classicTE);
+ }
+
+ {
+ std::locale utf8Locale(LOCALE_en_US_UTF_8);
+
+ auto te = utf8Locale.encoding();
+ auto utf8TE = std::text_encoding(id::UTF8);
+ assert(te == id::UTF8);
+ assert(te == utf8TE);
+ }
+
+ return 0;
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
new file mode 100644
index 0000000000000..62c30d7295491
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding::text_encoding() noexcept
+
+// Concerns:
+// 1. Default constructor must be nothrow
+// 2. Default constructing a text_encoding object makes it so that mib() == id::unknown, and its name is empty
+
+#include <cassert>
+#include <cstring>
+#include <text_encoding>
+#include <type_traits>
+
+int main(int, char**) {
+ {
+ static_assert(
+ std::is_nothrow_default_constructible<std::text_encoding>::value, "Must be nothrow default constructible");
+ }
+
+ {
+ auto te = std::text_encoding();
+ assert(te.mib() == std::text_encoding::id::unknown);
+ assert(strcmp(te.name(), "") == 0);
+ }
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
new file mode 100644
index 0000000000000..6562971c73dbb
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
@@ -0,0 +1,56 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding::text_encoding(id) noexcept
+
+// Concerns:
+// 1. text_encoding(id) must be nothrow
+// 2. Constructing an object with a valid id must set mib() and the name to the corresponding value.
+// 3. Constructing an object using id::unknown must set mib() to id::unknown and the name to an empty string.
+// 4. Constructing an object using id::other must set mib() to id::other and the name to an empty string.
+
+#include "test_text_encoding.h"
+#include <cassert>
+#include <string_view>
+#include <text_encoding>
+#include <type_traits>
+
+using te_id = std::text_encoding::id;
+
+constexpr void test_ctor(te_id i, te_id expect_id, std::string_view expect_name) {
+ auto te = std::text_encoding(i);
+ assert(te.mib() == expect_id);
+ assert(expect_name.compare(te.name()) == 0);
+}
+
+int main() {
+ {
+ static_assert(std::is_nothrow_constructible<std::text_encoding, std::text_encoding::id>::value,
+ "Must be nothrow constructible with id");
+ }
+
+ {
+ for (auto pair : unique_encoding_data){
+ test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name);
+ }
+ }
+
+ {
+ for(int i = 2261; i < 2300; i++){ // test out of range id values
+ test_ctor(te_id{i}, te_id::unknown, "");
+ }
+ }
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
new file mode 100644
index 0000000000000..b72adf0274cb2
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
@@ -0,0 +1,73 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding::text_encoding(string_view) noexcept
+
+#include "test_macros.h"
+#include "test_text_encoding.h"
+#include <cassert>
+#include <cstring>
+#include <string_view>
+#include <text_encoding>
+#include <type_traits>
+
+constexpr void test_ctor(std::string_view str, std::string_view expect, std::text_encoding::id expect_id) {
+ auto te = std::text_encoding(str);
+ assert(te.mib() == expect_id);
+ assert(expect.compare(te.name()) == 0);
+}
+
+void test_correct_encoding_spellings() {
+ for (auto pair : unique_encoding_data) {
+ test_ctor(pair.name, pair.name, std::text_encoding::id{pair.mib});
+ }
+}
+
+int main() {
+ {
+ static_assert(std::is_nothrow_constructible<std::text_encoding, std::string_view>::value,
+ "Must be nothrow constructible with string_view");
+ }
+
+ // happy paths
+ {
+ test_correct_encoding_spellings();
+ }
+
+ {
+ test_ctor("U_T_F-8", "U_T_F-8", std::text_encoding::UTF8);
+ }
+
+ {
+ test_ctor("utf8", "utf8", std::text_encoding::UTF8);
+ }
+
+ {
+ test_ctor("u.t.f-008", "u.t.f-008", std::text_encoding::UTF8);
+ }
+
+ {
+ test_ctor("utf-80", "utf-80", std::text_encoding::other);
+ }
+
+ {
+ test_ctor("iso885931988", "iso885931988", std::text_encoding::ISOLatin3);
+ }
+
+ {
+ test_ctor("iso00885931988", "iso00885931988", std::text_encoding::ISOLatin3);
+ }
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
new file mode 100644
index 0000000000000..30ce1badec1d7
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
@@ -0,0 +1,69 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding operator==(const text_encoding&, id) _NOEXCEPT
+
+// Concerns:
+// 1. operator==(const text_encoding&, id) must be noexcept
+// 2. operator==(const text_encoding&, id) returns true if mib() is equal to the id
+// 3. operator==(const text_encoding&, id) returns false if mib() is not equal to the id
+
+#include <cassert>
+#include <text_encoding>
+#include <type_traits>
+
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+using id = std::text_encoding::id;
+
+int main() {
+
+ { // 1
+ auto te = std::text_encoding();
+ ASSERT_NOEXCEPT(te == id::UTF8);
+ }
+
+ { // 2
+ auto te = std::text_encoding(id::UTF8);
+ assert(te == id::UTF8);
+ }
+
+ { // 2.0.1
+ constexpr auto te = std::text_encoding();
+ static_assert(te == id::unknown);
+ }
+
+ { // 2.1
+ auto te = std::text_encoding(id::other);
+ assert(te == id::other);
+ }
+
+ { // 2.1.1
+ constexpr auto te = std::text_encoding(id::other);
+ static_assert(te == id::other);
+ }
+
+ { // 3
+ auto te = std::text_encoding(id::UTF8);
+ assert(!(te == id::UTF16));
+ }
+
+ { // 3
+ constexpr auto te = std::text_encoding(id::UTF8);
+ static_assert(!(te == id::UTF16));
+ }
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
new file mode 100644
index 0000000000000..81fdb0a681143
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding operator==(const text_encoding&, const text_encoding&) _NOEXCEPT
+
+// Concerns:
+// 1. operator==(const text_encoding&, const text_encoding&) must be noexcept
+// 2. operator==(const text_encoding&, const text_encoding&) returns true if both text_encoding ids are equal
+// 3. operator==(const text_encoding&, const text_encoding&) for text_encodings with ids of "other" return true if the names are equal
+// 4. operator==(const text_encoding&, const text_encoding&) returns false when comparingtext_encodings with different ids
+// 5. operator==(const text_encoding&, const text_encoding&) for text_encodings with ids of "other" returns false if the names are not equal
+
+#include <cassert>
+#include <text_encoding>
+#include <type_traits>
+
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+using id = std::text_encoding::id;
+
+int main(){
+
+ { // 1
+ auto te1 = std::text_encoding();
+ auto te2 = std::text_encoding();
+ ASSERT_NOEXCEPT(te1 == te2);
+ }
+
+ { // 2
+ auto te1 = std::text_encoding(id::UTF8);
+ auto te2 = std::text_encoding(id::UTF8);
+ assert(te1 == te2);
+ }
+
+ { // 3
+ auto other_te1 = std::text_encoding("foo");
+ auto other_te2 = std::text_encoding("foo");
+ assert(other_te1 == other_te2);
+ }
+
+ { // 4
+ auto te1 = std::text_encoding(id::UTF8);
+ auto te2 = std::text_encoding(id::UTF16);
+ assert(!(te1 == te2));
+ }
+
+ { // 5
+ auto other_te1 = std::text_encoding("foo");
+ auto other_te2 = std::text_encoding("bar");
+ assert(!(other_te1 == other_te2));
+ }
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
new file mode 100644
index 0000000000000..345438d6a1ecb
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding text_encoding::environment();
+
+// Concerns:
+// 1. Verify that text_encoding::aliases_view satisfies ranges::forward_range, copyable, view,
+// ranges::random_access_range and ranges::borrowed_range
+
+#include <concepts>
+#include <ranges>
+#include <text_encoding>
+#include <type_traits>
+
+#include "platform_support.h"
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+int main(){
+ static_assert(std::ranges::forward_range<std::text_encoding::aliases_view>);
+ static_assert(std::copyable<std::text_encoding::aliases_view>);
+ static_assert(std::ranges::view<std::text_encoding::aliases_view>);
+ static_assert(std::ranges::random_access_range<std::text_encoding::aliases_view>);
+ static_assert(std::ranges::borrowed_range<std::text_encoding::aliases_view>);
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
new file mode 100644
index 0000000000000..c0412d4d39ee1
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
@@ -0,0 +1,83 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+// REQUIRES: locale.en_US.UTF-8
+
+// UNSUPPORTED: no-localization
+// UNSUPPORTED: windows
+
+// libc++ is not built with C++26, and the implementation for this function is in a source file.
+// XFAIL: *
+
+// class text_encoding
+
+// text_encoding text_encoding::environment();
+
+// Concerns:
+// 1. text_encoding::environment() returns the encoding for the "C" locale, which should be the default for any C++ program.
+// 2. text_encoding::environment() still returns the "C" locale encoding when the locale is set to "en_US.UTF-8".
+// 3. text_encoding::environment() is affected by changes to the "LANG" environment variable.
+
+// The current implementation of text_encoding::environment() while conformant,
+// is unfortunately affected by changes to the "LANG" environment variable.
+
+#include <cassert>
+#include <clocale>
+#include <cstdlib>
+#include <string_view>
+#include <text_encoding>
+
+#include "platform_support.h"
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+int main() {
+
+ { // 1
+ auto te = std::text_encoding::environment();
+
+ assert(te == std::text_encoding::environment());
+ assert(te.mib() == std::text_encoding::id::ASCII);
+ assert(te == std::text_encoding::id::ASCII);
+ assert(std::string_view(te.name()) == "ANSI_X3.4-1968");
+ assert(te == std::text_encoding("ANSI_X3.4-1968"));
+
+ assert(std::text_encoding::environment_is<std::text_encoding::id::ASCII>());
+ }
+
+ { // 2
+ std::setlocale(LC_ALL, "en_US.UTF-8");
+
+ auto te = std::text_encoding::environment();
+
+ assert(te == std::text_encoding::environment());
+ assert(te.mib() == std::text_encoding::id::ASCII);
+ assert(std::string_view(te.name()) == "ANSI_X3.4-1968");
+ assert(te == std::text_encoding("ANSI_X3.4-1968"));
+
+ assert(std::text_encoding::environment_is<std::text_encoding::id::ASCII>());
+ }
+
+ { // 3
+ setenv("LANG", LOCALE_en_US_UTF_8, 1);
+
+ auto te = std::text_encoding::environment();
+
+ assert(te == std::text_encoding::environment());
+ assert(te.mib() == std::text_encoding::id::UTF8);
+ assert(std::string_view(te.name()) == "UTF-8");
+ assert(te == std::text_encoding("UTF-8"));
+
+ assert(std::text_encoding::environment_is<std::text_encoding::id::UTF8>());
+ }
+
+ return 0;
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
new file mode 100644
index 0000000000000..200f9630de580
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
@@ -0,0 +1,49 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding text_encoding::literal() noexcept;
+
+// Concerns:
+// 1. text_encoding::literal() returns the proper encoding depending on the compiler, else unknown.
+
+#include <cassert>
+#include <text_encoding>
+#include <type_traits>
+#include <string_view>
+
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+int main() {
+#if __CHAR_BIT__ == 8
+
+ {
+ auto te = std::text_encoding::literal();
+# ifdef __GNUC_EXECUTION_CHARSET_NAME
+ assert(std::string_view(te.name()) == std::string_view(__GNUC_EXECUTION_CHARSET_NAME));
+# elif defined(__clang_literal_encoding__)
+ assert(std::string_view(te.name()) == std::string_view(__clang_literal_encoding__));
+# elif defined(__clang__)
+ assert(std::string_view(te.name()) == "UTF-8");
+ assert(te.mib() == std::text_encoding::id::UTF8);
+# else
+ assert(te.mib() = std::text_encoding::id::unknown);
+# endif
+ }
+
+#endif // if __CHAR_BIT__ == 8
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
new file mode 100644
index 0000000000000..61c148f854813
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding text_encoding::environment();
+
+// Concerns:
+// 1. begin() of an aliases_view() from a single text_encoding object are the same.
+// 2. begin() of aliases_views of two text_encoding objects that represent the same ID but hold different names are the same.
+// 3. begin() of aliases_views of two text_encoding objects that represent different IDs are different.
+
+#include <cassert>
+#include <cstdlib>
+#include <text_encoding>
+#include <ranges>
+
+#include "platform_support.h"
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+using id = std::text_encoding::id;
+
+int main() {
+
+ {
+ auto te = std::text_encoding(id::UTF8);
+ auto view1 = te.aliases();
+ auto view2 = te.aliases();
+
+ assert(std::ranges::begin(view1) == std::ranges::begin(view2));
+ }
+
+ {
+ auto te1 = std::text_encoding("ANSI_X3.4-1968");
+ auto te2 = std::text_encoding("ANSI_X3.4-1986");
+
+ auto view1 = te1.aliases();
+ auto view2 = te2.aliases();
+
+ assert(std::ranges::begin(view1) == std::ranges::begin(view2));
+ }
+
+ {
+
+ auto te1 = std::text_encoding(id::UTF8);
+ auto te2 = std::text_encoding(id::ASCII);
+
+ auto view1 = te1.aliases();
+ auto view2 = te2.aliases();
+
+ assert(!(std::ranges::begin(view1) == std::ranges::begin(view2)));
+ }
+
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
new file mode 100644
index 0000000000000..253a060dc0961
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
@@ -0,0 +1,64 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding text_encoding::environment();
+
+// Concerns:
+// 1. An alias_view of a text_encoding object for "other" is empty
+// 2. An alias_view of a text_encoding object for "unknown" is empty
+// 3. An alias_view of a text_encoding object for a known encoding e.g. "UTF-8" is not empty
+
+#include <cassert>
+#include <cstdlib>
+#include <ranges>
+#include <text_encoding>
+
+#include "platform_support.h"
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+using id = std::text_encoding::id;
+
+int main(){
+
+ {
+ auto te = std::text_encoding(id::other);
+ auto empty_range = te.aliases();
+
+ assert(std::ranges::empty(empty_range));
+ assert(empty_range.empty());
+ assert(!bool(empty_range));
+ }
+
+ {
+ auto te = std::text_encoding(id::unknown);
+ auto empty_range = te.aliases();
+
+ assert(std::ranges::empty(empty_range));
+ assert(empty_range.empty());
+ assert(!bool(empty_range));
+ }
+
+ {
+ auto te = std::text_encoding(id::UTF8);
+ auto range = te.aliases();
+
+ assert(!std::ranges::empty(range));
+ assert(!range.empty());
+ assert(bool(range));
+ }
+
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
new file mode 100644
index 0000000000000..fb9f80247b1ac
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// UNSUPPORTED: no-localization
+
+// class text_encoding
+
+// text_encoding text_encoding::environment();
+
+// Concerns:
+// 1. An aliases_view from a single text_encoding object returns the same front()
+// 2. An aliases_views of two text_encoding objects that represent the same ID but hold different names return the same front()
+// 3. An aliases_views of two text_encoding objects that represent different IDs return different front()
+
+#include <cassert>
+#include <cstdlib>
+#include <text_encoding>
+
+#include "platform_support.h"
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+using id = std::text_encoding::id;
+
+int main() {
+
+ {
+ auto te = std::text_encoding(id::UTF8);
+
+ auto view1 = te.aliases();
+ auto view2 = te.aliases();
+
+ assert(view1.front() == view2.front());
+ }
+
+ {
+ auto te1 = std::text_encoding("ANSI_X3.4-1968");
+ auto te2 = std::text_encoding("ANSI_X3.4-1986");
+
+ auto view1 = te1.aliases();
+ auto view2 = te2.aliases();
+
+ assert(view1.front() == view2.front());
+ }
+
+ {
+
+ auto te1 = std::text_encoding(id::UTF8);
+ auto te2 = std::text_encoding(id::ASCII);
+
+ auto view1 = te1.aliases();
+ auto view2 = te2.aliases();
+
+ assert(!(view1.front() == view2.front()));
+ }
+
+}
diff --git a/libcxx/test/support/test_text_encoding.h b/libcxx/test/support/test_text_encoding.h
new file mode 100644
index 0000000000000..460a259722f0d
--- /dev/null
+++ b/libcxx/test/support/test_text_encoding.h
@@ -0,0 +1,1173 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_TEST_TEXT_ENCODING_H
+#define SUPPORT_TEST_TEXT_ENCODING_H
+
+#include "test_macros.h"
+#include <cstdint>
+
+struct encoding_pair {
+ int_least32_t mib;
+ const char* name;
+};
+
+constexpr encoding_pair all_encoding_data[] = {
+ {1, ""},
+ {2, ""},
+ {3, "ANSI_X3.4-1968"},
+ {3, "ANSI_X3.4-1986"},
+ {3, "IBM367"},
+ {3, "ISO646-US"},
+ {3, "ISO_646.irv:1991"},
+ {3, "cp367"},
+ {3, "csASCII"},
+ {3, "iso-ir-6"},
+ {3, "us"},
+ {4, "ISO-8859-1"},
+ {4, "ISO_8859-1:1987"},
+ {4, "CP819"},
+ {4, "IBM819"},
+ {4, "ISO_8859-1"},
+ {4, "csISOLatin1"},
+ {4, "iso-ir-100"},
+ {4, "l1"},
+ {4, "latin1"},
+ {5, "ISO-8859-2"},
+ {5, "ISO_8859-2:1987"},
+ {5, "ISO_8859-2"},
+ {5, "csISOLatin2"},
+ {5, "iso-ir-101"},
+ {5, "l2"},
+ {5, "latin2"},
+ {6, "ISO-8859-3"},
+ {6, "ISO_8859-3:1988"},
+ {6, "ISO_8859-3"},
+ {6, "csISOLatin3"},
+ {6, "iso-ir-109"},
+ {6, "l3"},
+ {6, "latin3"},
+ {7, "ISO-8859-4"},
+ {7, "ISO_8859-4:1988"},
+ {7, "ISO_8859-4"},
+ {7, "csISOLatin4"},
+ {7, "iso-ir-110"},
+ {7, "l4"},
+ {7, "latin4"},
+ {8, "ISO-8859-5"},
+ {8, "ISO_8859-5:1988"},
+ {8, "ISO_8859-5"},
+ {8, "csISOLatinCyrillic"},
+ {8, "cyrillic"},
+ {8, "iso-ir-144"},
+ {9, "ISO-8859-6"},
+ {9, "ISO_8859-6:1987"},
+ {9, "ASMO-708"},
+ {9, "ECMA-114"},
+ {9, "ISO_8859-6"},
+ {9, "arabic"},
+ {9, "csISOLatinArabic"},
+ {9, "iso-ir-127"},
+ {10, "ISO-8859-7"},
+ {10, "ISO_8859-7:1987"},
+ {10, "ECMA-118"},
+ {10, "ELOT_928"},
+ {10, "ISO_8859-7"},
+ {10, "csISOLatinGreek"},
+ {10, "greek"},
+ {10, "greek8"},
+ {10, "iso-ir-126"},
+ {11, "ISO-8859-8"},
+ {11, "ISO_8859-8:1988"},
+ {11, "ISO_8859-8"},
+ {11, "csISOLatinHebrew"},
+ {11, "hebrew"},
+ {11, "iso-ir-138"},
+ {12, "ISO-8859-9"},
+ {12, "ISO_8859-9:1989"},
+ {12, "ISO_8859-9"},
+ {12, "csISOLatin5"},
+ {12, "iso-ir-148"},
+ {12, "l5"},
+ {12, "latin5"},
+ {13, "ISO-8859-10"},
+ {13, "ISO_8859-10:1992"},
+ {13, "csISOLatin6"},
+ {13, "iso-ir-157"},
+ {13, "l6"},
+ {13, "latin6"},
+ {14, "ISO_6937-2-add"},
+ {14, "csISOTextComm"},
+ {14, "iso-ir-142"},
+ {15, "JIS_X0201"},
+ {15, "X0201"},
+ {15, "csHalfWidthKatakana"},
+ {16, "JIS_Encoding"},
+ {16, "csJISEncoding"},
+ {17, "Shift_JIS"},
+ {17, "MS_Kanji"},
+ {17, "csShiftJIS"},
+ {18, "EUC-JP"},
+ {18, "Extended_UNIX_Code_Packed_Format_for_Japanese"},
+ {18, "csEUCPkdFmtJapanese"},
+ {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese"},
+ {19, "csEUCFixWidJapanese"},
+ {20, "BS_4730"},
+ {20, "ISO646-GB"},
+ {20, "csISO4UnitedKingdom"},
+ {20, "gb"},
+ {20, "iso-ir-4"},
+ {20, "uk"},
+ {21, "SEN_850200_C"},
+ {21, "ISO646-SE2"},
+ {21, "csISO11SwedishForNames"},
+ {21, "iso-ir-11"},
+ {21, "se2"},
+ {22, "IT"},
+ {22, "ISO646-IT"},
+ {22, "csISO15Italian"},
+ {22, "iso-ir-15"},
+ {23, "ES"},
+ {23, "ISO646-ES"},
+ {23, "csISO17Spanish"},
+ {23, "iso-ir-17"},
+ {24, "DIN_66003"},
+ {24, "ISO646-DE"},
+ {24, "csISO21German"},
+ {24, "de"},
+ {24, "iso-ir-21"},
+ {25, "NS_4551-1"},
+ {25, "ISO646-NO"},
+ {25, "csISO60DanishNorwegian"},
+ {25, "csISO60Norwegian1"},
+ {25, "iso-ir-60"},
+ {25, "no"},
+ {26, "NF_Z_62-010"},
+ {26, "ISO646-FR"},
+ {26, "csISO69French"},
+ {26, "fr"},
+ {26, "iso-ir-69"},
+ {27, "ISO-10646-UTF-1"},
+ {27, "csISO10646UTF1"},
+ {28, "ISO_646.basic:1983"},
+ {28, "csISO646basic1983"},
+ {28, "ref"},
+ {29, "INVARIANT"},
+ {29, "csINVARIANT"},
+ {30, "ISO_646.irv:1983"},
+ {30, "csISO2IntlRefVersion"},
+ {30, "irv"},
+ {30, "iso-ir-2"},
+ {31, "NATS-SEFI"},
+ {31, "csNATSSEFI"},
+ {31, "iso-ir-8-1"},
+ {32, "NATS-SEFI-ADD"},
+ {32, "csNATSSEFIADD"},
+ {32, "iso-ir-8-2"},
+ {33, "NATS-DANO"},
+ {33, "csNATSDANO"},
+ {33, "iso-ir-9-1"},
+ {34, "NATS-DANO-ADD"},
+ {34, "csNATSDANOADD"},
+ {34, "iso-ir-9-2"},
+ {35, "SEN_850200_B"},
+ {35, "FI"},
+ {35, "ISO646-FI"},
+ {35, "ISO646-SE"},
+ {35, "csISO10Swedish"},
+ {35, "iso-ir-10"},
+ {35, "se"},
+ {36, "KS_C_5601-1987"},
+ {36, "KSC_5601"},
+ {36, "KS_C_5601-1989"},
+ {36, "csKSC56011987"},
+ {36, "iso-ir-149"},
+ {36, "korean"},
+ {37, "ISO-2022-KR"},
+ {37, "csISO2022KR"},
+ {38, "EUC-KR"},
+ {38, "csEUCKR"},
+ {39, "ISO-2022-JP"},
+ {39, "csISO2022JP"},
+ {40, "ISO-2022-JP-2"},
+ {40, "csISO2022JP2"},
+ {41, "JIS_C6220-1969-jp"},
+ {41, "JIS_C6220-1969"},
+ {41, "csISO13JISC6220jp"},
+ {41, "iso-ir-13"},
+ {41, "katakana"},
+ {41, "x0201-7"},
+ {42, "JIS_C6220-1969-ro"},
+ {42, "ISO646-JP"},
+ {42, "csISO14JISC6220ro"},
+ {42, "iso-ir-14"},
+ {42, "jp"},
+ {43, "PT"},
+ {43, "ISO646-PT"},
+ {43, "csISO16Portuguese"},
+ {43, "iso-ir-16"},
+ {44, "greek7-old"},
+ {44, "csISO18Greek7Old"},
+ {44, "iso-ir-18"},
+ {45, "latin-greek"},
+ {45, "csISO19LatinGreek"},
+ {45, "iso-ir-19"},
+ {46, "NF_Z_62-010_(1973)"},
+ {46, "ISO646-FR1"},
+ {46, "csISO25French"},
+ {46, "iso-ir-25"},
+ {47, "Latin-greek-1"},
+ {47, "csISO27LatinGreek1"},
+ {47, "iso-ir-27"},
+ {48, "ISO_5427"},
+ {48, "csISO5427Cyrillic"},
+ {48, "iso-ir-37"},
+ {49, "JIS_C6226-1978"},
+ {49, "csISO42JISC62261978"},
+ {49, "iso-ir-42"},
+ {50, "BS_viewdata"},
+ {50, "csISO47BSViewdata"},
+ {50, "iso-ir-47"},
+ {51, "INIS"},
+ {51, "csISO49INIS"},
+ {51, "iso-ir-49"},
+ {52, "INIS-8"},
+ {52, "csISO50INIS8"},
+ {52, "iso-ir-50"},
+ {53, "INIS-cyrillic"},
+ {53, "csISO51INISCyrillic"},
+ {53, "iso-ir-51"},
+ {54, "ISO_5427:1981"},
+ {54, "ISO5427Cyrillic1981"},
+ {54, "csISO54271981"},
+ {54, "iso-ir-54"},
+ {55, "ISO_5428:1980"},
+ {55, "csISO5428Greek"},
+ {55, "iso-ir-55"},
+ {56, "GB_1988-80"},
+ {56, "ISO646-CN"},
+ {56, "cn"},
+ {56, "csISO57GB1988"},
+ {56, "iso-ir-57"},
+ {57, "GB_2312-80"},
+ {57, "chinese"},
+ {57, "csISO58GB231280"},
+ {57, "iso-ir-58"},
+ {58, "NS_4551-2"},
+ {58, "ISO646-NO2"},
+ {58, "csISO61Norwegian2"},
+ {58, "iso-ir-61"},
+ {58, "no2"},
+ {59, "videotex-suppl"},
+ {59, "csISO70VideotexSupp1"},
+ {59, "iso-ir-70"},
+ {60, "PT2"},
+ {60, "ISO646-PT2"},
+ {60, "csISO84Portuguese2"},
+ {60, "iso-ir-84"},
+ {61, "ES2"},
+ {61, "ISO646-ES2"},
+ {61, "csISO85Spanish2"},
+ {61, "iso-ir-85"},
+ {62, "MSZ_7795.3"},
+ {62, "ISO646-HU"},
+ {62, "csISO86Hungarian"},
+ {62, "hu"},
+ {62, "iso-ir-86"},
+ {63, "JIS_C6226-1983"},
+ {63, "JIS_X0208-1983"},
+ {63, "csISO87JISX0208"},
+ {63, "iso-ir-87"},
+ {63, "x0208"},
+ {64, "greek7"},
+ {64, "csISO88Greek7"},
+ {64, "iso-ir-88"},
+ {65, "ASMO_449"},
+ {65, "ISO_9036"},
+ {65, "arabic7"},
+ {65, "csISO89ASMO449"},
+ {65, "iso-ir-89"},
+ {66, "iso-ir-90"},
+ {66, "csISO90"},
+ {67, "JIS_C6229-1984-a"},
+ {67, "csISO91JISC62291984a"},
+ {67, "iso-ir-91"},
+ {67, "jp-ocr-a"},
+ {68, "JIS_C6229-1984-b"},
+ {68, "ISO646-JP-OCR-B"},
+ {68, "csISO92JISC62991984b"},
+ {68, "iso-ir-92"},
+ {68, "jp-ocr-b"},
+ {69, "JIS_C6229-1984-b-add"},
+ {69, "csISO93JIS62291984badd"},
+ {69, "iso-ir-93"},
+ {69, "jp-ocr-b-add"},
+ {70, "JIS_C6229-1984-hand"},
+ {70, "csISO94JIS62291984hand"},
+ {70, "iso-ir-94"},
+ {70, "jp-ocr-hand"},
+ {71, "JIS_C6229-1984-hand-add"},
+ {71, "csISO95JIS62291984handadd"},
+ {71, "iso-ir-95"},
+ {71, "jp-ocr-hand-add"},
+ {72, "JIS_C6229-1984-kana"},
+ {72, "csISO96JISC62291984kana"},
+ {72, "iso-ir-96"},
+ {73, "ISO_2033-1983"},
+ {73, "csISO2033"},
+ {73, "e13b"},
+ {73, "iso-ir-98"},
+ {74, "ANSI_X3.110-1983"},
+ {74, "CSA_T500-1983"},
+ {74, "NAPLPS"},
+ {74, "csISO99NAPLPS"},
+ {74, "iso-ir-99"},
+ {75, "T.61-7bit"},
+ {75, "csISO102T617bit"},
+ {75, "iso-ir-102"},
+ {76, "T.61-8bit"},
+ {76, "T.61"},
+ {76, "csISO103T618bit"},
+ {76, "iso-ir-103"},
+ {77, "ECMA-cyrillic"},
+ {77, "KOI8-E"},
+ {77, "csISO111ECMACyrillic"},
+ {77, "iso-ir-111"},
+ {78, "CSA_Z243.4-1985-1"},
+ {78, "ISO646-CA"},
+ {78, "ca"},
+ {78, "csISO121Canadian1"},
+ {78, "csa7-1"},
+ {78, "csa71"},
+ {78, "iso-ir-121"},
+ {79, "CSA_Z243.4-1985-2"},
+ {79, "ISO646-CA2"},
+ {79, "csISO122Canadian2"},
+ {79, "csa7-2"},
+ {79, "csa72"},
+ {79, "iso-ir-122"},
+ {80, "CSA_Z243.4-1985-gr"},
+ {80, "csISO123CSAZ24341985gr"},
+ {80, "iso-ir-123"},
+ {81, "ISO-8859-6-E"},
+ {81, "ISO_8859-6-E"},
+ {81, "csISO88596E"},
+ {82, "ISO-8859-6-I"},
+ {82, "ISO_8859-6-I"},
+ {82, "csISO88596I"},
+ {83, "T.101-G2"},
+ {83, "csISO128T101G2"},
+ {83, "iso-ir-128"},
+ {84, "ISO-8859-8-E"},
+ {84, "ISO_8859-8-E"},
+ {84, "csISO88598E"},
+ {85, "ISO-8859-8-I"},
+ {85, "ISO_8859-8-I"},
+ {85, "csISO88598I"},
+ {86, "CSN_369103"},
+ {86, "csISO139CSN369103"},
+ {86, "iso-ir-139"},
+ {87, "JUS_I.B1.002"},
+ {87, "ISO646-YU"},
+ {87, "csISO141JUSIB1002"},
+ {87, "iso-ir-141"},
+ {87, "js"},
+ {87, "yu"},
+ {88, "IEC_P27-1"},
+ {88, "csISO143IECP271"},
+ {88, "iso-ir-143"},
+ {89, "JUS_I.B1.003-serb"},
+ {89, "csISO146Serbian"},
+ {89, "iso-ir-146"},
+ {89, "serbian"},
+ {90, "JUS_I.B1.003-mac"},
+ {90, "csISO147Macedonian"},
+ {90, "iso-ir-147"},
+ {90, "macedonian"},
+ {91, "greek-ccitt"},
+ {91, "csISO150"},
+ {91, "csISO150GreekCCITT"},
+ {91, "iso-ir-150"},
+ {92, "NC_NC00-10:81"},
+ {92, "ISO646-CU"},
+ {92, "csISO151Cuba"},
+ {92, "cuba"},
+ {92, "iso-ir-151"},
+ {93, "ISO_6937-2-25"},
+ {93, "csISO6937Add"},
+ {93, "iso-ir-152"},
+ {94, "GOST_19768-74"},
+ {94, "ST_SEV_358-88"},
+ {94, "csISO153GOST1976874"},
+ {94, "iso-ir-153"},
+ {95, "ISO_8859-supp"},
+ {95, "csISO8859Supp"},
+ {95, "iso-ir-154"},
+ {95, "latin1-2-5"},
+ {96, "ISO_10367-box"},
+ {96, "csISO10367Box"},
+ {96, "iso-ir-155"},
+ {97, "latin-lap"},
+ {97, "csISO158Lap"},
+ {97, "iso-ir-158"},
+ {97, "lap"},
+ {98, "JIS_X0212-1990"},
+ {98, "csISO159JISX02121990"},
+ {98, "iso-ir-159"},
+ {98, "x0212"},
+ {99, "DS_2089"},
+ {99, "DS2089"},
+ {99, "ISO646-DK"},
+ {99, "csISO646Danish"},
+ {99, "dk"},
+ {100, "us-dk"},
+ {100, "csUSDK"},
+ {101, "dk-us"},
+ {101, "csDKUS"},
+ {102, "KSC5636"},
+ {102, "ISO646-KR"},
+ {102, "csKSC5636"},
+ {103, "UNICODE-1-1-UTF-7"},
+ {103, "csUnicode11UTF7"},
+ {104, "ISO-2022-CN"},
+ {104, "csISO2022CN"},
+ {105, "ISO-2022-CN-EXT"},
+ {105, "csISO2022CNEXT"},
+ {106, "UTF-8"},
+ {106, "csUTF8"},
+ {109, "ISO-8859-13"},
+ {109, "csISO885913"},
+ {110, "ISO-8859-14"},
+ {110, "ISO_8859-14"},
+ {110, "ISO_8859-14:1998"},
+ {110, "csISO885914"},
+ {110, "iso-celtic"},
+ {110, "iso-ir-199"},
+ {110, "l8"},
+ {110, "latin8"},
+ {111, "ISO-8859-15"},
+ {111, "ISO_8859-15"},
+ {111, "Latin-9"},
+ {111, "csISO885915"},
+ {112, "ISO-8859-16"},
+ {112, "ISO_8859-16"},
+ {112, "ISO_8859-16:2001"},
+ {112, "csISO885916"},
+ {112, "iso-ir-226"},
+ {112, "l10"},
+ {112, "latin10"},
+ {113, "GBK"},
+ {113, "CP936"},
+ {113, "MS936"},
+ {113, "csGBK"},
+ {113, "windows-936"},
+ {114, "GB18030"},
+ {114, "csGB18030"},
+ {115, "OSD_EBCDIC_DF04_15"},
+ {115, "csOSDEBCDICDF0415"},
+ {116, "OSD_EBCDIC_DF03_IRV"},
+ {116, "csOSDEBCDICDF03IRV"},
+ {117, "OSD_EBCDIC_DF04_1"},
+ {117, "csOSDEBCDICDF041"},
+ {118, "ISO-11548-1"},
+ {118, "ISO_11548-1"},
+ {118, "ISO_TR_11548-1"},
+ {118, "csISO115481"},
+ {119, "KZ-1048"},
+ {119, "RK1048"},
+ {119, "STRK1048-2002"},
+ {119, "csKZ1048"},
+ {1000, "ISO-10646-UCS-2"},
+ {1000, "csUnicode"},
+ {1001, "ISO-10646-UCS-4"},
+ {1001, "csUCS4"},
+ {1002, "ISO-10646-UCS-Basic"},
+ {1002, "csUnicodeASCII"},
+ {1003, "ISO-10646-Unicode-Latin1"},
+ {1003, "ISO-10646"},
+ {1003, "csUnicodeLatin1"},
+ {1004, "ISO-10646-J-1"},
+ {1004, "csUnicodeJapanese"},
+ {1005, "ISO-Unicode-IBM-1261"},
+ {1005, "csUnicodeIBM1261"},
+ {1006, "ISO-Unicode-IBM-1268"},
+ {1006, "csUnicodeIBM1268"},
+ {1007, "ISO-Unicode-IBM-1276"},
+ {1007, "csUnicodeIBM1276"},
+ {1008, "ISO-Unicode-IBM-1264"},
+ {1008, "csUnicodeIBM1264"},
+ {1009, "ISO-Unicode-IBM-1265"},
+ {1009, "csUnicodeIBM1265"},
+ {1010, "UNICODE-1-1"},
+ {1010, "csUnicode11"},
+ {1011, "SCSU"},
+ {1011, "csSCSU"},
+ {1012, "UTF-7"},
+ {1012, "csUTF7"},
+ {1013, "UTF-16BE"},
+ {1013, "csUTF16BE"},
+ {1014, "UTF-16LE"},
+ {1014, "csUTF16LE"},
+ {1015, "UTF-16"},
+ {1015, "csUTF16"},
+ {1016, "CESU-8"},
+ {1016, "csCESU-8"},
+ {1016, "csCESU8"},
+ {1017, "UTF-32"},
+ {1017, "csUTF32"},
+ {1018, "UTF-32BE"},
+ {1018, "csUTF32BE"},
+ {1019, "UTF-32LE"},
+ {1019, "csUTF32LE"},
+ {1020, "BOCU-1"},
+ {1020, "csBOCU-1"},
+ {1020, "csBOCU1"},
+ {1021, "UTF-7-IMAP"},
+ {1021, "csUTF7IMAP"},
+ {2000, "ISO-8859-1-Windows-3.0-Latin-1"},
+ {2000, "csWindows30Latin1"},
+ {2001, "ISO-8859-1-Windows-3.1-Latin-1"},
+ {2001, "csWindows31Latin1"},
+ {2002, "ISO-8859-2-Windows-Latin-2"},
+ {2002, "csWindows31Latin2"},
+ {2003, "ISO-8859-9-Windows-Latin-5"},
+ {2003, "csWindows31Latin5"},
+ {2004, "hp-roman8"},
+ {2004, "csHPRoman8"},
+ {2004, "r8"},
+ {2004, "roman8"},
+ {2005, "Adobe-Standard-Encoding"},
+ {2005, "csAdobeStandardEncoding"},
+ {2006, "Ventura-US"},
+ {2006, "csVenturaUS"},
+ {2007, "Ventura-International"},
+ {2007, "csVenturaInternational"},
+ {2008, "DEC-MCS"},
+ {2008, "csDECMCS"},
+ {2008, "dec"},
+ {2009, "IBM850"},
+ {2009, "850"},
+ {2009, "cp850"},
+ {2009, "csPC850Multilingual"},
+ {2010, "IBM852"},
+ {2010, "852"},
+ {2010, "cp852"},
+ {2010, "csPCp852"},
+ {2011, "IBM437"},
+ {2011, "437"},
+ {2011, "cp437"},
+ {2011, "csPC8CodePage437"},
+ {2012, "PC8-Danish-Norwegian"},
+ {2012, "csPC8DanishNorwegian"},
+ {2013, "IBM862"},
+ {2013, "862"},
+ {2013, "cp862"},
+ {2013, "csPC862LatinHebrew"},
+ {2014, "PC8-Turkish"},
+ {2014, "csPC8Turkish"},
+ {2015, "IBM-Symbols"},
+ {2015, "csIBMSymbols"},
+ {2016, "IBM-Thai"},
+ {2016, "csIBMThai"},
+ {2017, "HP-Legal"},
+ {2017, "csHPLegal"},
+ {2018, "HP-Pi-font"},
+ {2018, "csHPPiFont"},
+ {2019, "HP-Math8"},
+ {2019, "csHPMath8"},
+ {2020, "Adobe-Symbol-Encoding"},
+ {2020, "csHPPSMath"},
+ {2021, "HP-DeskTop"},
+ {2021, "csHPDesktop"},
+ {2022, "Ventura-Math"},
+ {2022, "csVenturaMath"},
+ {2023, "Microsoft-Publishing"},
+ {2023, "csMicrosoftPublishing"},
+ {2024, "Windows-31J"},
+ {2024, "csWindows31J"},
+ {2025, "GB2312"},
+ {2025, "csGB2312"},
+ {2026, "Big5"},
+ {2026, "csBig5"},
+ {2027, "macintosh"},
+ {2027, "csMacintosh"},
+ {2027, "mac"},
+ {2028, "IBM037"},
+ {2028, "cp037"},
+ {2028, "csIBM037"},
+ {2028, "ebcdic-cp-ca"},
+ {2028, "ebcdic-cp-nl"},
+ {2028, "ebcdic-cp-us"},
+ {2028, "ebcdic-cp-wt"},
+ {2029, "IBM038"},
+ {2029, "EBCDIC-INT"},
+ {2029, "cp038"},
+ {2029, "csIBM038"},
+ {2030, "IBM273"},
+ {2030, "CP273"},
+ {2030, "csIBM273"},
+ {2031, "IBM274"},
+ {2031, "CP274"},
+ {2031, "EBCDIC-BE"},
+ {2031, "csIBM274"},
+ {2032, "IBM275"},
+ {2032, "EBCDIC-BR"},
+ {2032, "cp275"},
+ {2032, "csIBM275"},
+ {2033, "IBM277"},
+ {2033, "EBCDIC-CP-DK"},
+ {2033, "EBCDIC-CP-NO"},
+ {2033, "csIBM277"},
+ {2034, "IBM278"},
+ {2034, "CP278"},
+ {2034, "csIBM278"},
+ {2034, "ebcdic-cp-fi"},
+ {2034, "ebcdic-cp-se"},
+ {2035, "IBM280"},
+ {2035, "CP280"},
+ {2035, "csIBM280"},
+ {2035, "ebcdic-cp-it"},
+ {2036, "IBM281"},
+ {2036, "EBCDIC-JP-E"},
+ {2036, "cp281"},
+ {2036, "csIBM281"},
+ {2037, "IBM284"},
+ {2037, "CP284"},
+ {2037, "csIBM284"},
+ {2037, "ebcdic-cp-es"},
+ {2038, "IBM285"},
+ {2038, "CP285"},
+ {2038, "csIBM285"},
+ {2038, "ebcdic-cp-gb"},
+ {2039, "IBM290"},
+ {2039, "EBCDIC-JP-kana"},
+ {2039, "cp290"},
+ {2039, "csIBM290"},
+ {2040, "IBM297"},
+ {2040, "cp297"},
+ {2040, "csIBM297"},
+ {2040, "ebcdic-cp-fr"},
+ {2041, "IBM420"},
+ {2041, "cp420"},
+ {2041, "csIBM420"},
+ {2041, "ebcdic-cp-ar1"},
+ {2042, "IBM423"},
+ {2042, "cp423"},
+ {2042, "csIBM423"},
+ {2042, "ebcdic-cp-gr"},
+ {2043, "IBM424"},
+ {2043, "cp424"},
+ {2043, "csIBM424"},
+ {2043, "ebcdic-cp-he"},
+ {2044, "IBM500"},
+ {2044, "CP500"},
+ {2044, "csIBM500"},
+ {2044, "ebcdic-cp-be"},
+ {2044, "ebcdic-cp-ch"},
+ {2045, "IBM851"},
+ {2045, "851"},
+ {2045, "cp851"},
+ {2045, "csIBM851"},
+ {2046, "IBM855"},
+ {2046, "855"},
+ {2046, "cp855"},
+ {2046, "csIBM855"},
+ {2047, "IBM857"},
+ {2047, "857"},
+ {2047, "cp857"},
+ {2047, "csIBM857"},
+ {2048, "IBM860"},
+ {2048, "860"},
+ {2048, "cp860"},
+ {2048, "csIBM860"},
+ {2049, "IBM861"},
+ {2049, "861"},
+ {2049, "cp-is"},
+ {2049, "cp861"},
+ {2049, "csIBM861"},
+ {2050, "IBM863"},
+ {2050, "863"},
+ {2050, "cp863"},
+ {2050, "csIBM863"},
+ {2051, "IBM864"},
+ {2051, "cp864"},
+ {2051, "csIBM864"},
+ {2052, "IBM865"},
+ {2052, "865"},
+ {2052, "cp865"},
+ {2052, "csIBM865"},
+ {2053, "IBM868"},
+ {2053, "CP868"},
+ {2053, "cp-ar"},
+ {2053, "csIBM868"},
+ {2054, "IBM869"},
+ {2054, "869"},
+ {2054, "cp-gr"},
+ {2054, "cp869"},
+ {2054, "csIBM869"},
+ {2055, "IBM870"},
+ {2055, "CP870"},
+ {2055, "csIBM870"},
+ {2055, "ebcdic-cp-roece"},
+ {2055, "ebcdic-cp-yu"},
+ {2056, "IBM871"},
+ {2056, "CP871"},
+ {2056, "csIBM871"},
+ {2056, "ebcdic-cp-is"},
+ {2057, "IBM880"},
+ {2057, "EBCDIC-Cyrillic"},
+ {2057, "cp880"},
+ {2057, "csIBM880"},
+ {2058, "IBM891"},
+ {2058, "cp891"},
+ {2058, "csIBM891"},
+ {2059, "IBM903"},
+ {2059, "cp903"},
+ {2059, "csIBM903"},
+ {2060, "IBM904"},
+ {2060, "904"},
+ {2060, "cp904"},
+ {2060, "csIBBM904"},
+ {2061, "IBM905"},
+ {2061, "CP905"},
+ {2061, "csIBM905"},
+ {2061, "ebcdic-cp-tr"},
+ {2062, "IBM918"},
+ {2062, "CP918"},
+ {2062, "csIBM918"},
+ {2062, "ebcdic-cp-ar2"},
+ {2063, "IBM1026"},
+ {2063, "CP1026"},
+ {2063, "csIBM1026"},
+ {2064, "EBCDIC-AT-DE"},
+ {2064, "csIBMEBCDICATDE"},
+ {2065, "EBCDIC-AT-DE-A"},
+ {2065, "csEBCDICATDEA"},
+ {2066, "EBCDIC-CA-FR"},
+ {2066, "csEBCDICCAFR"},
+ {2067, "EBCDIC-DK-NO"},
+ {2067, "csEBCDICDKNO"},
+ {2068, "EBCDIC-DK-NO-A"},
+ {2068, "csEBCDICDKNOA"},
+ {2069, "EBCDIC-FI-SE"},
+ {2069, "csEBCDICFISE"},
+ {2070, "EBCDIC-FI-SE-A"},
+ {2070, "csEBCDICFISEA"},
+ {2071, "EBCDIC-FR"},
+ {2071, "csEBCDICFR"},
+ {2072, "EBCDIC-IT"},
+ {2072, "csEBCDICIT"},
+ {2073, "EBCDIC-PT"},
+ {2073, "csEBCDICPT"},
+ {2074, "EBCDIC-ES"},
+ {2074, "csEBCDICES"},
+ {2075, "EBCDIC-ES-A"},
+ {2075, "csEBCDICESA"},
+ {2076, "EBCDIC-ES-S"},
+ {2076, "csEBCDICESS"},
+ {2077, "EBCDIC-UK"},
+ {2077, "csEBCDICUK"},
+ {2078, "EBCDIC-US"},
+ {2078, "csEBCDICUS"},
+ {2079, "UNKNOWN-8BIT"},
+ {2079, "csUnknown8BiT"},
+ {2080, "MNEMONIC"},
+ {2080, "csMnemonic"},
+ {2081, "MNEM"},
+ {2081, "csMnem"},
+ {2082, "VISCII"},
+ {2082, "csVISCII"},
+ {2083, "VIQR"},
+ {2083, "csVIQR"},
+ {2084, "KOI8-R"},
+ {2084, "csKOI8R"},
+ {2085, "HZ-GB-2312"},
+ {2086, "IBM866"},
+ {2086, "866"},
+ {2086, "cp866"},
+ {2086, "csIBM866"},
+ {2087, "IBM775"},
+ {2087, "cp775"},
+ {2087, "csPC775Baltic"},
+ {2088, "KOI8-U"},
+ {2088, "csKOI8U"},
+ {2089, "IBM00858"},
+ {2089, "CCSID00858"},
+ {2089, "CP00858"},
+ {2089, "PC-Multilingual-850+euro"},
+ {2089, "csIBM00858"},
+ {2090, "IBM00924"},
+ {2090, "CCSID00924"},
+ {2090, "CP00924"},
+ {2090, "csIBM00924"},
+ {2090, "ebcdic-Latin9--euro"},
+ {2091, "IBM01140"},
+ {2091, "CCSID01140"},
+ {2091, "CP01140"},
+ {2091, "csIBM01140"},
+ {2091, "ebcdic-us-37+euro"},
+ {2092, "IBM01141"},
+ {2092, "CCSID01141"},
+ {2092, "CP01141"},
+ {2092, "csIBM01141"},
+ {2092, "ebcdic-de-273+euro"},
+ {2093, "IBM01142"},
+ {2093, "CCSID01142"},
+ {2093, "CP01142"},
+ {2093, "csIBM01142"},
+ {2093, "ebcdic-dk-277+euro"},
+ {2093, "ebcdic-no-277+euro"},
+ {2094, "IBM01143"},
+ {2094, "CCSID01143"},
+ {2094, "CP01143"},
+ {2094, "csIBM01143"},
+ {2094, "ebcdic-fi-278+euro"},
+ {2094, "ebcdic-se-278+euro"},
+ {2095, "IBM01144"},
+ {2095, "CCSID01144"},
+ {2095, "CP01144"},
+ {2095, "csIBM01144"},
+ {2095, "ebcdic-it-280+euro"},
+ {2096, "IBM01145"},
+ {2096, "CCSID01145"},
+ {2096, "CP01145"},
+ {2096, "csIBM01145"},
+ {2096, "ebcdic-es-284+euro"},
+ {2097, "IBM01146"},
+ {2097, "CCSID01146"},
+ {2097, "CP01146"},
+ {2097, "csIBM01146"},
+ {2097, "ebcdic-gb-285+euro"},
+ {2098, "IBM01147"},
+ {2098, "CCSID01147"},
+ {2098, "CP01147"},
+ {2098, "csIBM01147"},
+ {2098, "ebcdic-fr-297+euro"},
+ {2099, "IBM01148"},
+ {2099, "CCSID01148"},
+ {2099, "CP01148"},
+ {2099, "csIBM01148"},
+ {2099, "ebcdic-international-500+euro"},
+ {2100, "IBM01149"},
+ {2100, "CCSID01149"},
+ {2100, "CP01149"},
+ {2100, "csIBM01149"},
+ {2100, "ebcdic-is-871+euro"},
+ {2101, "Big5-HKSCS"},
+ {2101, "csBig5HKSCS"},
+ {2102, "IBM1047"},
+ {2102, "IBM-1047"},
+ {2102, "csIBM1047"},
+ {2103, "PTCP154"},
+ {2103, "CP154"},
+ {2103, "Cyrillic-Asian"},
+ {2103, "PT154"},
+ {2103, "csPTCP154"},
+ {2104, "Amiga-1251"},
+ {2104, "Ami-1251"},
+ {2104, "Ami1251"},
+ {2104, "Amiga1251"},
+ {2104, "csAmiga1251"},
+ {2105, "KOI7-switched"},
+ {2105, "csKOI7switched"},
+ {2106, "BRF"},
+ {2106, "csBRF"},
+ {2107, "TSCII"},
+ {2107, "csTSCII"},
+ {2108, "CP51932"},
+ {2108, "csCP51932"},
+ {2109, "windows-874"},
+ {2109, "cswindows874"},
+ {2250, "windows-1250"},
+ {2250, "cswindows1250"},
+ {2251, "windows-1251"},
+ {2251, "cswindows1251"},
+ {2252, "windows-1252"},
+ {2252, "cswindows1252"},
+ {2253, "windows-1253"},
+ {2253, "cswindows1253"},
+ {2254, "windows-1254"},
+ {2254, "cswindows1254"},
+ {2255, "windows-1255"},
+ {2255, "cswindows1255"},
+ {2256, "windows-1256"},
+ {2256, "cswindows1256"},
+ {2257, "windows-1257"},
+ {2257, "cswindows1257"},
+ {2258, "windows-1258"},
+ {2258, "cswindows1258"},
+ {2259, "TIS-620"},
+ {2259, "ISO-8859-11"},
+ {2259, "csTIS620"},
+ {2260, "CP50220"},
+ {2260, "csCP50220"},
+ {0, nullptr} // sentinel
+};
+
+constexpr encoding_pair unique_encoding_data[] = {
+ {3, "ANSI_X3.4-1968"},
+ {4, "ISO-8859-1"},
+ {5, "ISO-8859-2"},
+ {6, "ISO-8859-3"},
+ {7, "ISO-8859-4"},
+ {8, "ISO-8859-5"},
+ {9, "ISO-8859-6"},
+ {10, "ISO-8859-7"},
+ {11, "ISO-8859-8"},
+ {12, "ISO-8859-9"},
+ {13, "ISO-8859-10"},
+ {14, "ISO_6937-2-add"},
+ {15, "JIS_X0201"},
+ {16, "JIS_Encoding"},
+ {17, "Shift_JIS"},
+ {18, "EUC-JP"},
+ {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese"},
+ {20, "BS_4730"},
+ {21, "SEN_850200_C"},
+ {22, "IT"},
+ {23, "ES"},
+ {24, "DIN_66003"},
+ {25, "NS_4551-1"},
+ {26, "NF_Z_62-010"},
+ {27, "ISO-10646-UTF-1"},
+ {28, "ISO_646.basic:1983"},
+ {29, "INVARIANT"},
+ {30, "ISO_646.irv:1983"},
+ {31, "NATS-SEFI"},
+ {32, "NATS-SEFI-ADD"},
+ {33, "NATS-DANO"},
+ {34, "NATS-DANO-ADD"},
+ {35, "SEN_850200_B"},
+ {36, "KS_C_5601-1987"},
+ {37, "ISO-2022-KR"},
+ {38, "EUC-KR"},
+ {39, "ISO-2022-JP"},
+ {40, "ISO-2022-JP-2"},
+ {41, "JIS_C6220-1969-jp"},
+ {42, "JIS_C6220-1969-ro"},
+ {43, "PT"},
+ {44, "greek7-old"},
+ {45, "latin-greek"},
+ {46, "NF_Z_62-010_(1973)"},
+ {47, "Latin-greek-1"},
+ {48, "ISO_5427"},
+ {49, "JIS_C6226-1978"},
+ {50, "BS_viewdata"},
+ {51, "INIS"},
+ {52, "INIS-8"},
+ {53, "INIS-cyrillic"},
+ {54, "ISO_5427:1981"},
+ {55, "ISO_5428:1980"},
+ {56, "GB_1988-80"},
+ {57, "GB_2312-80"},
+ {58, "NS_4551-2"},
+ {59, "videotex-suppl"},
+ {60, "PT2"},
+ {61, "ES2"},
+ {62, "MSZ_7795.3"},
+ {63, "JIS_C6226-1983"},
+ {64, "greek7"},
+ {65, "ASMO_449"},
+ {66, "iso-ir-90"},
+ {67, "JIS_C6229-1984-a"},
+ {68, "JIS_C6229-1984-b"},
+ {69, "JIS_C6229-1984-b-add"},
+ {70, "JIS_C6229-1984-hand"},
+ {71, "JIS_C6229-1984-hand-add"},
+ {72, "JIS_C6229-1984-kana"},
+ {73, "ISO_2033-1983"},
+ {74, "ANSI_X3.110-1983"},
+ {75, "T.61-7bit"},
+ {76, "T.61-8bit"},
+ {77, "ECMA-cyrillic"},
+ {78, "CSA_Z243.4-1985-1"},
+ {79, "CSA_Z243.4-1985-2"},
+ {80, "CSA_Z243.4-1985-gr"},
+ {81, "ISO-8859-6-E"},
+ {82, "ISO-8859-6-I"},
+ {83, "T.101-G2"},
+ {84, "ISO-8859-8-E"},
+ {85, "ISO-8859-8-I"},
+ {86, "CSN_369103"},
+ {87, "JUS_I.B1.002"},
+ {88, "IEC_P27-1"},
+ {89, "JUS_I.B1.003-serb"},
+ {90, "JUS_I.B1.003-mac"},
+ {91, "greek-ccitt"},
+ {92, "NC_NC00-10:81"},
+ {93, "ISO_6937-2-25"},
+ {94, "GOST_19768-74"},
+ {95, "ISO_8859-supp"},
+ {96, "ISO_10367-box"},
+ {97, "latin-lap"},
+ {98, "JIS_X0212-1990"},
+ {99, "DS_2089"},
+ {100, "us-dk"},
+ {101, "dk-us"},
+ {102, "KSC5636"},
+ {103, "UNICODE-1-1-UTF-7"},
+ {104, "ISO-2022-CN"},
+ {105, "ISO-2022-CN-EXT"},
+ {106, "UTF-8"},
+ {109, "ISO-8859-13"},
+ {110, "ISO-8859-14"},
+ {111, "ISO-8859-15"},
+ {112, "ISO-8859-16"},
+ {113, "GBK"},
+ {114, "GB18030"},
+ {115, "OSD_EBCDIC_DF04_15"},
+ {116, "OSD_EBCDIC_DF03_IRV"},
+ {117, "OSD_EBCDIC_DF04_1"},
+ {118, "ISO-11548-1"},
+ {119, "KZ-1048"},
+ {1000, "ISO-10646-UCS-2"},
+ {1001, "ISO-10646-UCS-4"},
+ {1002, "ISO-10646-UCS-Basic"},
+ {1003, "ISO-10646-Unicode-Latin1"},
+ {1004, "ISO-10646-J-1"},
+ {1005, "ISO-Unicode-IBM-1261"},
+ {1006, "ISO-Unicode-IBM-1268"},
+ {1007, "ISO-Unicode-IBM-1276"},
+ {1008, "ISO-Unicode-IBM-1264"},
+ {1009, "ISO-Unicode-IBM-1265"},
+ {1010, "UNICODE-1-1"},
+ {1011, "SCSU"},
+ {1012, "UTF-7"},
+ {1013, "UTF-16BE"},
+ {1014, "UTF-16LE"},
+ {1015, "UTF-16"},
+ {1016, "CESU-8"},
+ {1017, "UTF-32"},
+ {1018, "UTF-32BE"},
+ {1019, "UTF-32LE"},
+ {1020, "BOCU-1"},
+ {1021, "UTF-7-IMAP"},
+ {2000, "ISO-8859-1-Windows-3.0-Latin-1"},
+ {2001, "ISO-8859-1-Windows-3.1-Latin-1"},
+ {2002, "ISO-8859-2-Windows-Latin-2"},
+ {2003, "ISO-8859-9-Windows-Latin-5"},
+ {2004, "hp-roman8"},
+ {2005, "Adobe-Standard-Encoding"},
+ {2006, "Ventura-US"},
+ {2007, "Ventura-International"},
+ {2008, "DEC-MCS"},
+ {2009, "IBM850"},
+ {2010, "IBM852"},
+ {2011, "IBM437"},
+ {2012, "PC8-Danish-Norwegian"},
+ {2013, "IBM862"},
+ {2014, "PC8-Turkish"},
+ {2015, "IBM-Symbols"},
+ {2016, "IBM-Thai"},
+ {2017, "HP-Legal"},
+ {2018, "HP-Pi-font"},
+ {2019, "HP-Math8"},
+ {2020, "Adobe-Symbol-Encoding"},
+ {2021, "HP-DeskTop"},
+ {2022, "Ventura-Math"},
+ {2023, "Microsoft-Publishing"},
+ {2024, "Windows-31J"},
+ {2025, "GB2312"},
+ {2026, "Big5"},
+ {2027, "macintosh"},
+ {2028, "IBM037"},
+ {2029, "IBM038"},
+ {2030, "IBM273"},
+ {2031, "IBM274"},
+ {2032, "IBM275"},
+ {2033, "IBM277"},
+ {2034, "IBM278"},
+ {2035, "IBM280"},
+ {2036, "IBM281"},
+ {2037, "IBM284"},
+ {2038, "IBM285"},
+ {2039, "IBM290"},
+ {2040, "IBM297"},
+ {2041, "IBM420"},
+ {2042, "IBM423"},
+ {2043, "IBM424"},
+ {2044, "IBM500"},
+ {2045, "IBM851"},
+ {2046, "IBM855"},
+ {2047, "IBM857"},
+ {2048, "IBM860"},
+ {2049, "IBM861"},
+ {2050, "IBM863"},
+ {2051, "IBM864"},
+ {2052, "IBM865"},
+ {2053, "IBM868"},
+ {2054, "IBM869"},
+ {2055, "IBM870"},
+ {2056, "IBM871"},
+ {2057, "IBM880"},
+ {2058, "IBM891"},
+ {2059, "IBM903"},
+ {2060, "IBM904"},
+ {2061, "IBM905"},
+ {2062, "IBM918"},
+ {2063, "IBM1026"},
+ {2064, "EBCDIC-AT-DE"},
+ {2065, "EBCDIC-AT-DE-A"},
+ {2066, "EBCDIC-CA-FR"},
+ {2067, "EBCDIC-DK-NO"},
+ {2068, "EBCDIC-DK-NO-A"},
+ {2069, "EBCDIC-FI-SE"},
+ {2070, "EBCDIC-FI-SE-A"},
+ {2071, "EBCDIC-FR"},
+ {2072, "EBCDIC-IT"},
+ {2073, "EBCDIC-PT"},
+ {2074, "EBCDIC-ES"},
+ {2075, "EBCDIC-ES-A"},
+ {2076, "EBCDIC-ES-S"},
+ {2077, "EBCDIC-UK"},
+ {2078, "EBCDIC-US"},
+ {2079, "UNKNOWN-8BIT"},
+ {2080, "MNEMONIC"},
+ {2081, "MNEM"},
+ {2082, "VISCII"},
+ {2083, "VIQR"},
+ {2084, "KOI8-R"},
+ {2085, "HZ-GB-2312"},
+ {2086, "IBM866"},
+ {2087, "IBM775"},
+ {2088, "KOI8-U"},
+ {2089, "IBM00858"},
+ {2090, "IBM00924"},
+ {2091, "IBM01140"},
+ {2092, "IBM01141"},
+ {2093, "IBM01142"},
+ {2094, "IBM01143"},
+ {2095, "IBM01144"},
+ {2096, "IBM01145"},
+ {2097, "IBM01146"},
+ {2098, "IBM01147"},
+ {2099, "IBM01148"},
+ {2100, "IBM01149"},
+ {2101, "Big5-HKSCS"},
+ {2102, "IBM1047"},
+ {2103, "PTCP154"},
+ {2104, "Amiga-1251"},
+ {2105, "KOI7-switched"},
+ {2106, "BRF"},
+ {2107, "TSCII"},
+ {2108, "CP51932"},
+ {2109, "windows-874"},
+ {2250, "windows-1250"},
+ {2251, "windows-1251"},
+ {2252, "windows-1252"},
+ {2253, "windows-1253"},
+ {2254, "windows-1254"},
+ {2255, "windows-1255"},
+ {2256, "windows-1256"},
+ {2257, "windows-1257"},
+ {2258, "windows-1258"},
+ {2259, "TIS-620"},
+ {2260, "CP50220"},
+};
+
+#endif // SUPPORT_TEST_TEXT_ENCODING_H
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 2b7f6fa8a48a9..fc3294c4bba76 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -1346,7 +1346,6 @@ def add_version_header(tc):
"c++26": 202306 # P1885R12 Naming Text Encodings to Demystify Them
},
"headers": ["text_encoding"],
- "unimplemented": True,
},
{
"name": "__cpp_lib_three_way_comparison",
diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py
index a505d37b65b81..55b01f9eae44e 100644
--- a/libcxx/utils/libcxx/header_information.py
+++ b/libcxx/utils/libcxx/header_information.py
@@ -172,7 +172,6 @@ def __hash__(self) -> int:
"spanstream",
"stacktrace",
"stdfloat",
- "text_encoding",
]))
header_restrictions = {
@@ -196,6 +195,7 @@ def __hash__(self) -> int:
"streambuf": "_LIBCPP_HAS_LOCALIZATION",
"strstream": "_LIBCPP_HAS_LOCALIZATION",
"syncstream": "_LIBCPP_HAS_LOCALIZATION",
+ "text_encoding": "_LIBCPP_HAS_LOCALIZATION",
}
lit_header_restrictions = {
@@ -231,6 +231,7 @@ def __hash__(self) -> int:
"streambuf": "// UNSUPPORTED: no-localization",
"strstream": "// UNSUPPORTED: no-localization",
"syncstream": "// UNSUPPORTED: no-localization",
+ "text_encoding": "// UNSUPPORTED: no-localization",
"thread": "// UNSUPPORTED: no-threads, c++03",
"wchar.h": "// UNSUPPORTED: no-wide-characters",
"wctype.h": "// UNSUPPORTED: no-wide-characters",
>From 46758a78a3d4293911a90b5468c61258490ad4bf Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Fri, 23 May 2025 22:58:03 -0400
Subject: [PATCH 02/54] Clang-Format
---
libcxx/include/__locale | 4 +-
.../include/__text_encoding/text_encoding.h | 47 +++++++++----------
libcxx/include/text_encoding | 10 ++--
libcxx/src/locale.cpp | 22 ++++-----
libcxx/src/text_encoding.cpp | 36 +++++++-------
.../text_encoding.ctor/id.pass.cpp | 6 +--
.../text_encoding.eq/equal.id.pass.cpp | 3 +-
.../text_encoding.eq/equal.pass.cpp | 5 +-
.../text_encoding.members/aliases.pass.cpp | 8 ++--
.../environment.pass.cpp | 17 ++++---
.../text_encoding.members/literal.pass.cpp | 2 +-
.../text_encoding.aliases_view/begin.pass.cpp | 19 ++++----
.../text_encoding.aliases_view/empty.pass.cpp | 16 +++----
.../text_encoding.aliases_view/front.pass.cpp | 17 +++----
14 files changed, 100 insertions(+), 112 deletions(-)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 4da3f38ac408f..6f6091d08026f 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -103,9 +103,9 @@ public:
// locale operations:
string name() const;
-
+
# if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
- text_encoding encoding() const;
+ text_encoding encoding() const;
# endif // _LIBCPP_STD_VER >= 26
bool operator==(const locale&) const;
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 93d0ae2ab6b89..5edc631f7acfd 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -18,22 +18,22 @@
#if _LIBCPP_HAS_LOCALIZATION
-#include <__algorithm/copy_n.h>
-#include <__algorithm/lower_bound.h>
-#include <__algorithm/min.h>
-#include <__functional/hash.h>
-#include <__iterator/iterator_traits.h>
-#include <__locale_dir/locale_base_api.h>
-#include <__ranges/view_interface.h>
-#include <__string/char_traits.h>
-#include <__utility/unreachable.h>
-#include <cstdint>
-#include <string_view>
+# include <__algorithm/copy_n.h>
+# include <__algorithm/lower_bound.h>
+# include <__algorithm/min.h>
+# include <__functional/hash.h>
+# include <__iterator/iterator_traits.h>
+# include <__locale_dir/locale_base_api.h>
+# include <__ranges/view_interface.h>
+# include <__string/char_traits.h>
+# include <__utility/unreachable.h>
+# include <cstdint>
+# include <string_view>
_LIBCPP_PUSH_MACROS
-#include <__undef_macros>
+# include <__undef_macros>
-#if _LIBCPP_STD_VER >= 26
+# if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
@@ -340,12 +340,12 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
constexpr aliases_view(const __encoding_data* __d) : __view_data_(__d) {}
struct __end_sentinel {};
struct __iterator {
- using value_type = const char*;
- using reference = const char*;
- using difference_type = ptrdiff_t;
+ using value_type = const char*;
+ using reference = const char*;
+ using difference_type = ptrdiff_t;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr __iterator() noexcept = default;
- _LIBCPP_HIDE_FROM_ABI constexpr __iterator() noexcept = default;
-
_LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
if (__can_dereference())
return __data_->__name;
@@ -368,19 +368,18 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
}
_LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __it, difference_type __n) {
- __it -= __n;
+ __it -= __n;
return __it;
}
- _LIBCPP_HIDE_FROM_ABI constexpr difference_type operator-(const __iterator& __other) const
- {
- if(__other.__mib_rep_ == __mib_rep_)
+ _LIBCPP_HIDE_FROM_ABI constexpr difference_type operator-(const __iterator& __other) const {
+ if (__other.__mib_rep_ == __mib_rep_)
return __mib_rep_ - __other.__mib_rep_;
std::unreachable();
}
_LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(difference_type __n, __iterator& __it) {
- __it -= __n;
+ __it -= __n;
return __it;
}
@@ -1474,7 +1473,7 @@ inline constexpr bool enable_borrowed_range<text_encoding::aliases_view> = true;
_LIBCPP_END_NAMESPACE_STD
-#endif // _LIBCPP_STD_VER >= 26
+# endif // _LIBCPP_STD_VER >= 26
_LIBCPP_POP_MACROS
diff --git a/libcxx/include/text_encoding b/libcxx/include/text_encoding
index 579608e4f939d..b312e5ac1b9fb 100644
--- a/libcxx/include/text_encoding
+++ b/libcxx/include/text_encoding
@@ -10,7 +10,7 @@
#ifndef _LIBCPP_TEXT_ENCODING
#define _LIBCPP_TEXT_ENCODING
-/* text_encoding synopsis
+/* text_encoding synopsis
namespace std {
struct text_encoding;
@@ -19,7 +19,7 @@ struct text_encoding;
template<class T> struct hash;
template<> struct hash<text_encoding>;
-struct text_encoding
+struct text_encoding
{
static constexpr size_t max_name_length = 63;
@@ -60,9 +60,9 @@ struct text_encoding
#include <__config>
#if _LIBCPP_STD_VER >= 26
-# include <__text_encoding/text_encoding.h>
+# include <__text_encoding/text_encoding.h>
#endif // _LIBCPP_STD_VER >= 26
-#include <version>
+#include <version>
-#endif // _LIBCPP_TEXT_ENCODING
+#endif // _LIBCPP_TEXT_ENCODING
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 6d8dababb84ce..21c93e0939910 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -558,18 +558,18 @@ locale::locale(const locale& other, const locale& one, category c)
string locale::name() const { return __locale_->name(); }
-# if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
- text_encoding locale::encoding() const {
- std::string __name = this->name();
- if(__name.size() == 1){
- if(__name[0] == 'C')
- return std::text_encoding(text_encoding::id::ASCII);
- if(__name[0] == '*')
- return std::text_encoding();
- }
- return std::text_encoding(__name);
+#if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
+text_encoding locale::encoding() const {
+ std::string __name = this->name();
+ if (__name.size() == 1) {
+ if (__name[0] == 'C')
+ return std::text_encoding(text_encoding::id::ASCII);
+ if (__name[0] == '*')
+ return std::text_encoding();
}
-# endif // _LIBCPP_STD_VER >= 26
+ return std::text_encoding(__name);
+}
+#endif // _LIBCPP_STD_VER >= 26
void locale::__install_ctor(const locale& other, facet* f, long facet_id) {
if (f)
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 42812aaed49c7..95b0020ba7690 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include <__config>
+#include <__config>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -14,35 +14,35 @@
#include <__locale_dir/locale_base_api.h>
-#include <text_encoding>
+#include <text_encoding>
#if __has_include(<langinfo.h>)
# include <langinfo.h>
#endif
-#if _LIBCPP_STD_VER >= 26
+#if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
-#if __CHAR_BIT__ == 8
+# if __CHAR_BIT__ == 8
text_encoding text_encoding::environment() {
- auto __make_locale = [](const char* __name) {
- text_encoding __enc{};
- if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
- if (const char* __codeset = nl_langinfo_l(CODESET, __loc)) {
- string_view __s(__codeset);
- if (__s.size() < max_name_length)
- __enc = text_encoding(__s);
- }
- __locale::__freelocale(__loc);
+ auto __make_locale = [](const char* __name) {
+ text_encoding __enc{};
+ if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
+ if (const char* __codeset = nl_langinfo_l(CODESET, __loc)) {
+ string_view __s(__codeset);
+ if (__s.size() < max_name_length)
+ __enc = text_encoding(__s);
}
- return __enc;
- };
+ __locale::__freelocale(__loc);
+ }
+ return __enc;
+ };
- return __make_locale("");
- }
+ return __make_locale("");
+}
-# endif // __CHAR_BIT__ == 8
+# endif // __CHAR_BIT__ == 8
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
index 6562971c73dbb..f01c3956e4736 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
@@ -41,15 +41,15 @@ int main() {
static_assert(std::is_nothrow_constructible<std::text_encoding, std::text_encoding::id>::value,
"Must be nothrow constructible with id");
}
-
+
{
- for (auto pair : unique_encoding_data){
+ for (auto pair : unique_encoding_data) {
test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name);
}
}
{
- for(int i = 2261; i < 2300; i++){ // test out of range id values
+ for (int i = 2261; i < 2300; i++) { // test out of range id values
test_ctor(te_id{i}, te_id::unknown, "");
}
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
index 30ce1badec1d7..bf0c75a3ebbb5 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
@@ -14,7 +14,7 @@
// class text_encoding
-// text_encoding operator==(const text_encoding&, id) _NOEXCEPT
+// text_encoding operator==(const text_encoding&, id) _NOEXCEPT
// Concerns:
// 1. operator==(const text_encoding&, id) must be noexcept
@@ -31,7 +31,6 @@
using id = std::text_encoding::id;
int main() {
-
{ // 1
auto te = std::text_encoding();
ASSERT_NOEXCEPT(te == id::UTF8);
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
index 81fdb0a681143..27cd9bdff08a5 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
@@ -14,7 +14,7 @@
// class text_encoding
-// text_encoding operator==(const text_encoding&, const text_encoding&) _NOEXCEPT
+// text_encoding operator==(const text_encoding&, const text_encoding&) _NOEXCEPT
// Concerns:
// 1. operator==(const text_encoding&, const text_encoding&) must be noexcept
@@ -32,8 +32,7 @@
using id = std::text_encoding::id;
-int main(){
-
+int main() {
{ // 1
auto te1 = std::text_encoding();
auto te2 = std::text_encoding();
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
index 345438d6a1ecb..6b363ef7555e6 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
@@ -13,10 +13,10 @@
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::environment();
// Concerns:
-// 1. Verify that text_encoding::aliases_view satisfies ranges::forward_range, copyable, view,
+// 1. Verify that text_encoding::aliases_view satisfies ranges::forward_range, copyable, view,
// ranges::random_access_range and ranges::borrowed_range
#include <concepts>
@@ -24,11 +24,11 @@
#include <text_encoding>
#include <type_traits>
-#include "platform_support.h"
+#include "platform_support.h"
#include "test_macros.h"
#include "test_text_encoding.h"
-int main(){
+int main() {
static_assert(std::ranges::forward_range<std::text_encoding::aliases_view>);
static_assert(std::copyable<std::text_encoding::aliases_view>);
static_assert(std::ranges::view<std::text_encoding::aliases_view>);
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
index c0412d4d39ee1..cba72e45f0666 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
@@ -15,18 +15,18 @@
// UNSUPPORTED: windows
// libc++ is not built with C++26, and the implementation for this function is in a source file.
-// XFAIL: *
+// XFAIL: *
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::environment();
// Concerns:
// 1. text_encoding::environment() returns the encoding for the "C" locale, which should be the default for any C++ program.
// 2. text_encoding::environment() still returns the "C" locale encoding when the locale is set to "en_US.UTF-8".
-// 3. text_encoding::environment() is affected by changes to the "LANG" environment variable.
+// 3. text_encoding::environment() is affected by changes to the "LANG" environment variable.
-// The current implementation of text_encoding::environment() while conformant,
+// The current implementation of text_encoding::environment() while conformant,
// is unfortunately affected by changes to the "LANG" environment variable.
#include <cassert>
@@ -35,14 +35,13 @@
#include <string_view>
#include <text_encoding>
-#include "platform_support.h"
+#include "platform_support.h"
#include "test_macros.h"
#include "test_text_encoding.h"
int main() {
-
{ // 1
- auto te = std::text_encoding::environment();
+ auto te = std::text_encoding::environment();
assert(te == std::text_encoding::environment());
assert(te.mib() == std::text_encoding::id::ASCII);
@@ -68,7 +67,7 @@ int main() {
{ // 3
setenv("LANG", LOCALE_en_US_UTF_8, 1);
-
+
auto te = std::text_encoding::environment();
assert(te == std::text_encoding::environment());
@@ -78,6 +77,6 @@ int main() {
assert(std::text_encoding::environment_is<std::text_encoding::id::UTF8>());
}
-
+
return 0;
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
index 200f9630de580..4aa6d793000b3 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
@@ -30,7 +30,7 @@
int main() {
#if __CHAR_BIT__ == 8
-
+
{
auto te = std::text_encoding::literal();
# ifdef __GNUC_EXECUTION_CHARSET_NAME
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
index 61c148f854813..ecb98403fc821 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
@@ -14,53 +14,50 @@
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::environment();
// Concerns:
// 1. begin() of an aliases_view() from a single text_encoding object are the same.
// 2. begin() of aliases_views of two text_encoding objects that represent the same ID but hold different names are the same.
-// 3. begin() of aliases_views of two text_encoding objects that represent different IDs are different.
+// 3. begin() of aliases_views of two text_encoding objects that represent different IDs are different.
#include <cassert>
#include <cstdlib>
#include <text_encoding>
#include <ranges>
-#include "platform_support.h"
+#include "platform_support.h"
#include "test_macros.h"
#include "test_text_encoding.h"
using id = std::text_encoding::id;
int main() {
-
{
- auto te = std::text_encoding(id::UTF8);
+ auto te = std::text_encoding(id::UTF8);
auto view1 = te.aliases();
auto view2 = te.aliases();
-
- assert(std::ranges::begin(view1) == std::ranges::begin(view2));
+
+ assert(std::ranges::begin(view1) == std::ranges::begin(view2));
}
{
auto te1 = std::text_encoding("ANSI_X3.4-1968");
auto te2 = std::text_encoding("ANSI_X3.4-1986");
- auto view1 = te1.aliases();
+ auto view1 = te1.aliases();
auto view2 = te2.aliases();
assert(std::ranges::begin(view1) == std::ranges::begin(view2));
}
{
-
auto te1 = std::text_encoding(id::UTF8);
auto te2 = std::text_encoding(id::ASCII);
-
+
auto view1 = te1.aliases();
auto view2 = te2.aliases();
assert(!(std::ranges::begin(view1) == std::ranges::begin(view2)));
}
-
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
index 253a060dc0961..6105655351675 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
@@ -14,7 +14,7 @@
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::environment();
// Concerns:
// 1. An alias_view of a text_encoding object for "other" is empty
@@ -26,25 +26,24 @@
#include <ranges>
#include <text_encoding>
-#include "platform_support.h"
+#include "platform_support.h"
#include "test_macros.h"
#include "test_text_encoding.h"
using id = std::text_encoding::id;
-int main(){
-
+int main() {
{
- auto te = std::text_encoding(id::other);
+ auto te = std::text_encoding(id::other);
auto empty_range = te.aliases();
-
+
assert(std::ranges::empty(empty_range));
assert(empty_range.empty());
assert(!bool(empty_range));
}
{
- auto te = std::text_encoding(id::unknown);
+ auto te = std::text_encoding(id::unknown);
auto empty_range = te.aliases();
assert(std::ranges::empty(empty_range));
@@ -53,12 +52,11 @@ int main(){
}
{
- auto te = std::text_encoding(id::UTF8);
+ auto te = std::text_encoding(id::UTF8);
auto range = te.aliases();
assert(!std::ranges::empty(range));
assert(!range.empty());
assert(bool(range));
}
-
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
index fb9f80247b1ac..9066e1e9f8da9 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
@@ -14,31 +14,30 @@
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::environment();
// Concerns:
// 1. An aliases_view from a single text_encoding object returns the same front()
// 2. An aliases_views of two text_encoding objects that represent the same ID but hold different names return the same front()
-// 3. An aliases_views of two text_encoding objects that represent different IDs return different front()
+// 3. An aliases_views of two text_encoding objects that represent different IDs return different front()
#include <cassert>
#include <cstdlib>
#include <text_encoding>
-#include "platform_support.h"
+#include "platform_support.h"
#include "test_macros.h"
#include "test_text_encoding.h"
using id = std::text_encoding::id;
int main() {
-
{
auto te = std::text_encoding(id::UTF8);
-
+
auto view1 = te.aliases();
auto view2 = te.aliases();
-
+
assert(view1.front() == view2.front());
}
@@ -46,21 +45,19 @@ int main() {
auto te1 = std::text_encoding("ANSI_X3.4-1968");
auto te2 = std::text_encoding("ANSI_X3.4-1986");
- auto view1 = te1.aliases();
+ auto view1 = te1.aliases();
auto view2 = te2.aliases();
assert(view1.front() == view2.front());
}
{
-
auto te1 = std::text_encoding(id::UTF8);
auto te2 = std::text_encoding(id::ASCII);
-
+
auto view1 = te1.aliases();
auto view2 = te2.aliases();
assert(!(view1.front() == view2.front()));
}
-
}
>From 38c97df18df0be1fc7859f7cef79ada14b4c928e Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Fri, 23 May 2025 23:20:55 -0400
Subject: [PATCH 03/54] Fix most causes of failing CI
---
libcxx/include/text_encoding | 12 +++++++-----
libcxx/test/libcxx/transitive_includes/cxx26.csv | 1 -
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/text_encoding b/libcxx/include/text_encoding
index b312e5ac1b9fb..b41913a57bd72 100644
--- a/libcxx/include/text_encoding
+++ b/libcxx/include/text_encoding
@@ -23,7 +23,7 @@ struct text_encoding
{
static constexpr size_t max_name_length = 63;
- // [text.encoding.id], enumeration text_encoding::id
+ // [text.encoding.id], enumeration text_encoding::id
enum class id : int_least32_t {
see below
};
@@ -36,8 +36,8 @@ struct text_encoding
constexpr id mib() const noexcept;
constexpr const char* name() const noexcept;
- // [text.encoding.aliases], class text_encoding::aliases_view
- struct aliases_view;
+ // [text.encoding.aliases], class text_encoding::aliases_view
+ // struct aliases_view;
constexpr aliases_view aliases() const noexcept;
friend constexpr bool operator==(const text_encoding& a,
@@ -59,10 +59,12 @@ struct text_encoding
#include <__config>
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
#if _LIBCPP_STD_VER >= 26
# include <__text_encoding/text_encoding.h>
#endif // _LIBCPP_STD_VER >= 26
-#include <version>
-
#endif // _LIBCPP_TEXT_ENCODING
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 24e57ab696857..a795b872a6646 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -1066,7 +1066,6 @@ system_error string_view
system_error tuple
system_error version
text_encoding cctype
-text_encoding clocale
text_encoding compare
text_encoding cstdint
text_encoding cstdio
>From 14fad44bbdad6e1b7d20e09ceb170908eb98c925 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Fri, 23 May 2025 23:54:20 -0400
Subject: [PATCH 04/54] Clang-Format again
---
.../text_encoding.version.compile.pass.cpp | 1 -
.../locales/locale/locale.members/encoding.pass.cpp | 9 ++++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
index 817b0f0d655db..1678e8840af8d 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp
@@ -60,4 +60,3 @@
#endif // TEST_STD_VER > 23
// clang-format on
-
diff --git a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
index 7ebfb5a0b4f74..806a025b82e2d 100644
--- a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
@@ -10,7 +10,7 @@
// <text_encoding>
// libc++ not built with C++26 yet
-// XFAIL: *
+// XFAIL: *
// REQUIRES: std-at-least-c++26
// REQUIRES: locale.en_US.UTF-8
// UNSUPPORTED: no-localization
@@ -33,11 +33,10 @@
using id = std::text_encoding::id;
int main() {
-
{
std::locale loc;
- auto te = loc.encoding();
+ auto te = loc.encoding();
auto classicTE = std::text_encoding(id::ASCII);
assert(te == id::ASCII);
assert(te == classicTE);
@@ -46,11 +45,11 @@ int main() {
{
std::locale utf8Locale(LOCALE_en_US_UTF_8);
- auto te = utf8Locale.encoding();
+ auto te = utf8Locale.encoding();
auto utf8TE = std::text_encoding(id::UTF8);
assert(te == id::UTF8);
assert(te == utf8TE);
}
- return 0;
+ return 0;
}
>From e247aeee3417d73d3aa70edfa44e56ba7e6ef289 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:00:29 -0400
Subject: [PATCH 05/54] Guard iterator operator* and operator- behind
_LIBCPP_ASSERTs
---
libcxx/include/__text_encoding/text_encoding.h | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 5edc631f7acfd..fee482aed0bc7 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -21,6 +21,7 @@
# include <__algorithm/copy_n.h>
# include <__algorithm/lower_bound.h>
# include <__algorithm/min.h>
+# include <__assert>
# include <__functional/hash.h>
# include <__iterator/iterator_traits.h>
# include <__locale_dir/locale_base_api.h>
@@ -347,9 +348,8 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
_LIBCPP_HIDE_FROM_ABI constexpr __iterator() noexcept = default;
_LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
- if (__can_dereference())
- return __data_->__name;
- std::unreachable();
+ _LIBCPP_ASSERT(__can_dereference(), "Dereferencing invalid aliases_view iterator!");
+ return __data_->__name;
}
_LIBCPP_HIDE_FROM_ABI constexpr value_type operator[](difference_type __n) const {
@@ -373,9 +373,8 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
}
_LIBCPP_HIDE_FROM_ABI constexpr difference_type operator-(const __iterator& __other) const {
- if (__other.__mib_rep_ == __mib_rep_)
- return __mib_rep_ - __other.__mib_rep_;
- std::unreachable();
+ _LIBCPP_ASSERT(__other.__mib_rep_ == __mib_rep_, "Subtracting ranges of two different text encodings!");
+ return __mib_rep_ - __other.__mib_rep_;
}
_LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(difference_type __n, __iterator& __it) {
>From 10ac11104be32a0fefb5086670088edf880c78a5 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:01:13 -0400
Subject: [PATCH 06/54] Remove useless [[__likely__]]
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index fee482aed0bc7..67423948ac89f 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -407,7 +407,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
// Check if going past the encoding data list array and if the new index has the same id, if not then
// replace it with a sentinel "out-of-bounds" iterator.
_LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __n) {
- if (__data_) [[__likely__]] {
+ if (__data_) {
if (__n > 0) {
if ((__data_ + __n) < std::end(__text_encoding_data) && __data_[__n - 1].__mib_rep == __mib_rep_)
__data_ += __n;
>From 67efeb8b77c02629c992a2f93afb75cdfba386b1 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:03:58 -0400
Subject: [PATCH 07/54] Replace _NOEXCEPT with noexcept
---
.../include/__text_encoding/text_encoding.h | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 67423948ac89f..3c5fd6707b3fb 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -46,11 +46,11 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
__id_rep __mib_rep;
const char* __name;
- friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) _NOEXCEPT {
+ friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) noexcept {
return __e.__mib_rep == __other.__mib_rep || __comp_name(__e.__name, __other.__name);
}
- friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) _NOEXCEPT {
+ friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) noexcept {
return __e.__mib_rep < __i;
}
};
@@ -323,17 +323,17 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
using enum id;
_LIBCPP_HIDE_FROM_ABI constexpr text_encoding() = default;
- _LIBCPP_HIDE_FROM_ABI constexpr explicit text_encoding(string_view __enc) _NOEXCEPT
+ _LIBCPP_HIDE_FROM_ABI constexpr explicit text_encoding(string_view __enc) noexcept
: __encoding_rep_(__find_encoding_data(__enc)) {
__enc.copy(__name_, max_name_length, 0);
}
- _LIBCPP_HIDE_FROM_ABI constexpr text_encoding(id __i) _NOEXCEPT : __encoding_rep_(__find_encoding_data_by_id(__i)) {
+ _LIBCPP_HIDE_FROM_ABI constexpr text_encoding(id __i) noexcept : __encoding_rep_(__find_encoding_data_by_id(__i)) {
if (__encoding_rep_->__name[0] != '\0')
std::copy_n(__encoding_rep_->__name, std::char_traits<char>::length(__encoding_rep_->__name), __name_);
}
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr id mib() const _NOEXCEPT { return id(__encoding_rep_->__mib_rep); }
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const char* name() const _NOEXCEPT { return __name_; }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr id mib() const noexcept { return id(__encoding_rep_->__mib_rep); }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const char* name() const noexcept { return __name_; }
// [text.encoding.aliases], class text_encoding::aliases_view
struct aliases_view : ranges::view_interface<aliases_view> {
@@ -453,7 +453,7 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
const __encoding_data* __view_data_ = nullptr;
};
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const _NOEXCEPT {
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const noexcept {
auto __rep = __encoding_rep_ - 1;
if (__encoding_rep_->__name[0]) {
while (__rep > std::begin(__text_encoding_data) && (__rep--)->__mib_rep == __encoding_rep_->__mib_rep)
@@ -465,19 +465,19 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
return aliases_view(__rep);
}
- _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) _NOEXCEPT {
+ _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) noexcept {
if (__a.mib() == id::other && __b.mib() == id::other)
return __comp_name(__a.__name_, __b.__name_);
return __a.mib() == __b.mib();
}
- _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __encoding, id __i) _NOEXCEPT {
+ _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __encoding, id __i) noexcept {
return __encoding.mib() == __i;
}
# if __CHAR_BIT__ == 8
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() _NOEXCEPT {
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() noexcept {
# ifdef __GNUC_EXECUTION_CHARSET_NAME
return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
# elif defined(__clang_literal_encoding__)
>From d2e630736793de8b41c4524cd39b8151c1a3113b Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:12:45 -0400
Subject: [PATCH 08/54] Remove __CHAR_BIT__ == 8 checks
---
libcxx/include/__locale | 2 +-
libcxx/include/__text_encoding/text_encoding.h | 8 --------
libcxx/src/locale.cpp | 2 +-
libcxx/src/text_encoding.cpp | 3 ---
4 files changed, 2 insertions(+), 13 deletions(-)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 6f6091d08026f..7001c7af35b62 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -104,7 +104,7 @@ public:
// locale operations:
string name() const;
-# if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
+# if _LIBCPP_STD_VER >= 26
text_encoding encoding() const;
# endif // _LIBCPP_STD_VER >= 26
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 3c5fd6707b3fb..7b6307c32b07f 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -476,7 +476,6 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
return __encoding.mib() == __i;
}
-# if __CHAR_BIT__ == 8
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() noexcept {
# ifdef __GNUC_EXECUTION_CHARSET_NAME
return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
@@ -496,13 +495,6 @@ struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
return environment() == __i;
}
-# else
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() = delete;
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment() = delete;
- template <id __i>
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() = delete;
-# endif
-
private:
_LIBCPP_HIDE_FROM_ABI static constexpr bool __comp_name(string_view __a, string_view __b) {
if (__a.empty() || __b.empty()) {
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 21c93e0939910..a91f3cc1fcefa 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -558,7 +558,7 @@ locale::locale(const locale& other, const locale& one, category c)
string locale::name() const { return __locale_->name(); }
-#if _LIBCPP_STD_VER >= 26 && __CHAR_BIT__ == 8
+#if _LIBCPP_STD_VER >= 26
text_encoding locale::encoding() const {
std::string __name = this->name();
if (__name.size() == 1) {
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 95b0020ba7690..7557d28ab0fd8 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -23,7 +23,6 @@
#if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
-# if __CHAR_BIT__ == 8
text_encoding text_encoding::environment() {
auto __make_locale = [](const char* __name) {
@@ -42,8 +41,6 @@ text_encoding text_encoding::environment() {
return __make_locale("");
}
-# endif // __CHAR_BIT__ == 8
-
_LIBCPP_END_NAMESPACE_STD
#endif // _LIBCPP_STD_VER > 26
>From c64eeeb12b0bd03d8c577734c28d8bff4d3407cb Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:13:37 -0400
Subject: [PATCH 09/54] Remove __locale_dir include in header, since it would
only be required in source file
---
libcxx/include/__text_encoding/text_encoding.h | 1 -
1 file changed, 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 7b6307c32b07f..1487d96349175 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -24,7 +24,6 @@
# include <__assert>
# include <__functional/hash.h>
# include <__iterator/iterator_traits.h>
-# include <__locale_dir/locale_base_api.h>
# include <__ranges/view_interface.h>
# include <__string/char_traits.h>
# include <__utility/unreachable.h>
>From cfc02e421ad47c40acf84f06cfba1fa7c998dc5a Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:16:08 -0400
Subject: [PATCH 10/54] Remove unnecessary _LIBCPP_EXPORTED_FROM_ABI
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 1487d96349175..21a3b1ff4382f 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -36,7 +36,7 @@ _LIBCPP_PUSH_MACROS
# if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
-struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
+struct text_encoding {
static constexpr size_t max_name_length = 63;
private:
>From d5b8d92c5bab4c1ee1760aab2c4c16c58815a312 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:30:56 -0400
Subject: [PATCH 11/54] Copy id enum specification from standard
---
libcxx/include/__text_encoding/text_encoding.h | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 21a3b1ff4382f..75f8908693265 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -88,8 +88,6 @@ struct text_encoding {
ISO2IntlRefVersion = 30,
NATSSEFI = 31,
NATSSEFIADD = 32,
- NATSDANO = 33,
- NATSDANOADD = 34,
ISO10Swedish = 35,
KSC56011987 = 36,
ISO2022KR = 37,
@@ -255,7 +253,7 @@ struct text_encoding {
IBM880 = 2057,
IBM891 = 2058,
IBM903 = 2059,
- IBBM904 = 2060,
+ IBM904 = 2060,
IBM905 = 2061,
IBM918 = 2062,
IBM1026 = 2063,
@@ -315,8 +313,7 @@ struct text_encoding {
windows1257 = 2257,
windows1258 = 2258,
TIS620 = 2259,
- CP50220 = 2260,
- reserved = 3000
+ CP50220 = 2260
};
using enum id;
@@ -705,12 +702,6 @@ struct text_encoding {
{32, "NATS-SEFI-ADD"},
{32, "csNATSSEFIADD"},
{32, "iso-ir-8-2"},
- {33, "NATS-DANO"},
- {33, "csNATSDANO"},
- {33, "iso-ir-9-1"},
- {34, "NATS-DANO-ADD"},
- {34, "csNATSDANOADD"},
- {34, "iso-ir-9-2"},
{35, "SEN_850200_B"},
{35, "FI"},
{35, "ISO646-FI"},
>From e7aaa6b1ef26e2d312f349ac293e8c13ee390968 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:43:54 -0400
Subject: [PATCH 12/54] Remove #ifdef __clang__ since
__clang_literal_encoding__ already indicates clang
---
libcxx/include/__text_encoding/text_encoding.h | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 75f8908693265..b11b9b3307378 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -473,15 +473,14 @@ struct text_encoding {
}
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() noexcept {
-# ifdef __GNUC_EXECUTION_CHARSET_NAME
+ // TODO: Remove this branch once we have __GNUC_EXECUTION_CHARSET_NAME or __clang_literal_encoding__ unconditionally
+# ifdef __GNUC_EXECUTION_CHARSET_NAME
return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
-# elif defined(__clang_literal_encoding__)
+# elif defined(__clang_literal_encoding__)
return text_encoding(__clang_literal_encoding__);
-# elif defined(__clang__)
- return text_encoding(id::UTF8);
-# else
- return {};
-# endif
+# else
+ return text_encoding();
+# endif
}
[[nodiscard]] static text_encoding environment();
>From 95dc46cce050847015f0a36d5853b68e3766d0db Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 13:45:45 -0400
Subject: [PATCH 13/54] Rename __encoding_data members
---
.../include/__text_encoding/text_encoding.h | 34 +++++++++----------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index b11b9b3307378..cac173187e3ef 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -42,15 +42,15 @@ struct text_encoding {
private:
struct __encoding_data {
using __id_rep _LIBCPP_NODEBUG = int_least32_t;
- __id_rep __mib_rep;
- const char* __name;
+ __id_rep __mib_rep_;
+ const char* __name_;
friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) noexcept {
- return __e.__mib_rep == __other.__mib_rep || __comp_name(__e.__name, __other.__name);
+ return __e.__mib_rep_ == __other.__mib_rep_ || __comp_name(__e.__name_, __other.__name_);
}
friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) noexcept {
- return __e.__mib_rep < __i;
+ return __e.__mib_rep_ < __i;
}
};
@@ -324,11 +324,11 @@ struct text_encoding {
__enc.copy(__name_, max_name_length, 0);
}
_LIBCPP_HIDE_FROM_ABI constexpr text_encoding(id __i) noexcept : __encoding_rep_(__find_encoding_data_by_id(__i)) {
- if (__encoding_rep_->__name[0] != '\0')
- std::copy_n(__encoding_rep_->__name, std::char_traits<char>::length(__encoding_rep_->__name), __name_);
+ if (__encoding_rep_->__name_[0] != '\0')
+ std::copy_n(__encoding_rep_->__name_, std::char_traits<char>::length(__encoding_rep_->__name_), __name_);
}
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr id mib() const noexcept { return id(__encoding_rep_->__mib_rep); }
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr id mib() const noexcept { return id(__encoding_rep_->__mib_rep_); }
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const char* name() const noexcept { return __name_; }
// [text.encoding.aliases], class text_encoding::aliases_view
@@ -345,7 +345,7 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI constexpr value_type operator*() const {
_LIBCPP_ASSERT(__can_dereference(), "Dereferencing invalid aliases_view iterator!");
- return __data_->__name;
+ return __data_->__name_;
}
_LIBCPP_HIDE_FROM_ABI constexpr value_type operator[](difference_type __n) const {
@@ -405,12 +405,12 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI constexpr __iterator& operator+=(difference_type __n) {
if (__data_) {
if (__n > 0) {
- if ((__data_ + __n) < std::end(__text_encoding_data) && __data_[__n - 1].__mib_rep == __mib_rep_)
+ if ((__data_ + __n) < std::end(__text_encoding_data) && __data_[__n - 1].__mib_rep_ == __mib_rep_)
__data_ += __n;
else
*this = __iterator{};
} else if (__n < 0) {
- if ((__data_ + __n) > __text_encoding_data && __data_[__n].__mib_rep == __mib_rep_)
+ if ((__data_ + __n) > __text_encoding_data && __data_[__n].__mib_rep_ == __mib_rep_)
__data_ += __n;
else
*this = __iterator{};
@@ -433,9 +433,9 @@ struct text_encoding {
friend struct text_encoding;
_LIBCPP_HIDE_FROM_ABI constexpr __iterator(const __encoding_data* __enc_d) noexcept
- : __data_(__enc_d), __mib_rep_(__enc_d ? __enc_d->__mib_rep : 0) {}
+ : __data_(__enc_d), __mib_rep_(__enc_d ? __enc_d->__mib_rep_ : 0) {}
- _LIBCPP_HIDE_FROM_ABI bool __can_dereference() const { return __data_ && __data_->__mib_rep == __mib_rep_; }
+ _LIBCPP_HIDE_FROM_ABI bool __can_dereference() const { return __data_ && __data_->__mib_rep_ == __mib_rep_; }
// default iterator is a sentinel
const __encoding_data* __data_ = nullptr;
@@ -451,8 +451,8 @@ struct text_encoding {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const noexcept {
auto __rep = __encoding_rep_ - 1;
- if (__encoding_rep_->__name[0]) {
- while (__rep > std::begin(__text_encoding_data) && (__rep--)->__mib_rep == __encoding_rep_->__mib_rep)
+ if (__encoding_rep_->__name_[0]) {
+ while (__rep > std::begin(__text_encoding_data) && (__rep--)->__mib_rep_ == __encoding_rep_->__mib_rep_)
;
} else {
__rep = nullptr;
@@ -532,9 +532,9 @@ struct text_encoding {
auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data) - 1;
for (; __data_ptr != __data_last; __data_ptr++) {
- if (__comp_name(__a, __data_ptr->__name)) {
- const auto __found_id = __data_ptr->__mib_rep;
- while (__data_ptr[-1].__mib_rep == __found_id)
+ if (__comp_name(__a, __data_ptr->__name_)) {
+ const auto __found_id = __data_ptr->__mib_rep_;
+ while (__data_ptr[-1].__mib_rep_ == __found_id)
__data_ptr--;
return __data_ptr;
}
>From 1660c46a751b2ae36511a737645fe1a5b98f079f Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 15:08:34 -0400
Subject: [PATCH 14/54] Add precondition asserts for text_encoding constructors
---
libcxx/include/__text_encoding/text_encoding.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index cac173187e3ef..fbfb8712aef30 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -529,6 +529,7 @@ struct text_encoding {
}
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data(string_view __a) {
+ _LIBCPP_ASSERT(__a.size() <= max_name_length, "Passing encoding name longer than max_name_length!");
auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data) - 1;
for (; __data_ptr != __data_last; __data_ptr++) {
@@ -544,6 +545,7 @@ struct text_encoding {
}
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
+ _LIBCPP_ASSERT(__i >= id::other && __i <= id::CP50220, "Passing invalid id to text_encoding constructor!");
auto __found = std::lower_bound(
std::begin(__text_encoding_data), std::end(__text_encoding_data) - 1, __encoding_data::__id_rep(__i));
return __found != std::end(__text_encoding_data) - 1 ? __found : __text_encoding_data + 1; // unknown
>From 0727a20a5aeca0b8707e79a1e41fe1d0c4096c2c Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 15:17:57 -0400
Subject: [PATCH 15/54] Change nullptr sentinel __encoding_data rep value to
9999
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index fbfb8712aef30..edef94fb16070 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -1434,7 +1434,7 @@ struct text_encoding {
{2259, "csTIS620"},
{2260, "CP50220"},
{2260, "csCP50220"},
- {0, nullptr} // sentinel
+ {9999, nullptr} // sentinel
};
const __encoding_data* __encoding_rep_ = __text_encoding_data + 1;
>From b53010806cec9751941b3054e09ae7c7dc785d20 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 18:46:01 -0400
Subject: [PATCH 16/54] Remove localization guard
---
.../include/__text_encoding/text_encoding.h | 40 +++++++++----------
1 file changed, 18 insertions(+), 22 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index edef94fb16070..0e82c8c7ac9b1 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -16,24 +16,22 @@
# pragma GCC system_header
#endif
-#if _LIBCPP_HAS_LOCALIZATION
-
-# include <__algorithm/copy_n.h>
-# include <__algorithm/lower_bound.h>
-# include <__algorithm/min.h>
-# include <__assert>
-# include <__functional/hash.h>
-# include <__iterator/iterator_traits.h>
-# include <__ranges/view_interface.h>
-# include <__string/char_traits.h>
-# include <__utility/unreachable.h>
-# include <cstdint>
-# include <string_view>
+#include <__algorithm/copy_n.h>
+#include <__algorithm/lower_bound.h>
+#include <__algorithm/min.h>
+#include <__assert>
+#include <__functional/hash.h>
+#include <__iterator/iterator_traits.h>
+#include <__ranges/view_interface.h>
+#include <__string/char_traits.h>
+#include <__utility/unreachable.h>
+#include <cstdint>
+#include <string_view>
_LIBCPP_PUSH_MACROS
-# include <__undef_macros>
+#include <__undef_macros>
-# if _LIBCPP_STD_VER >= 26
+#if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
struct text_encoding {
@@ -474,13 +472,13 @@ struct text_encoding {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static consteval text_encoding literal() noexcept {
// TODO: Remove this branch once we have __GNUC_EXECUTION_CHARSET_NAME or __clang_literal_encoding__ unconditionally
-# ifdef __GNUC_EXECUTION_CHARSET_NAME
+# ifdef __GNUC_EXECUTION_CHARSET_NAME
return text_encoding(__GNUC_EXECUTION_CHARSET_NAME);
-# elif defined(__clang_literal_encoding__)
+# elif defined(__clang_literal_encoding__)
return text_encoding(__clang_literal_encoding__);
-# else
+# else
return text_encoding();
-# endif
+# endif
}
[[nodiscard]] static text_encoding environment();
@@ -1455,10 +1453,8 @@ inline constexpr bool enable_borrowed_range<text_encoding::aliases_view> = true;
_LIBCPP_END_NAMESPACE_STD
-# endif // _LIBCPP_STD_VER >= 26
+#endif // _LIBCPP_STD_VER >= 26
_LIBCPP_POP_MACROS
-#endif // _LIBCPP_HAS_LOCALIZATION
-
#endif // _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
>From 3a1ab1dcb4386b7e33574ffb805ce5375ccb15d7 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 23:21:59 -0400
Subject: [PATCH 17/54] Move __id_rep outside __encoding_data for convenience
---
libcxx/include/__text_encoding/text_encoding.h | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 0e82c8c7ac9b1..cf7351f76c21a 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -38,8 +38,8 @@ struct text_encoding {
static constexpr size_t max_name_length = 63;
private:
+ using __id_rep _LIBCPP_NODEBUG = int_least32_t;
struct __encoding_data {
- using __id_rep _LIBCPP_NODEBUG = int_least32_t;
__id_rep __mib_rep_;
const char* __name_;
@@ -53,7 +53,7 @@ struct text_encoding {
};
public:
- enum class id : __encoding_data::__id_rep {
+ enum class id : __id_rep {
other = 1,
unknown = 2,
ASCII = 3,
@@ -437,7 +437,7 @@ struct text_encoding {
// default iterator is a sentinel
const __encoding_data* __data_ = nullptr;
- __encoding_data::__id_rep __mib_rep_ = 0;
+ __id_rep __mib_rep_ = 0;
};
constexpr __iterator begin() const { return __iterator{__view_data_}; }
@@ -545,8 +545,10 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
_LIBCPP_ASSERT(__i >= id::other && __i <= id::CP50220, "Passing invalid id to text_encoding constructor!");
auto __found = std::lower_bound(
- std::begin(__text_encoding_data), std::end(__text_encoding_data) - 1, __encoding_data::__id_rep(__i));
- return __found != std::end(__text_encoding_data) - 1 ? __found : __text_encoding_data + 1; // unknown
+ std::begin(__text_encoding_data), std::end(__text_encoding_data), __id_rep(__i));
+ return __found != std::end(__text_encoding_data)
+ ? __found
+ : __text_encoding_data + 1; // only possible way to get unknown is if 33, 34 are passed
}
_LIBCPP_HIDE_FROM_ABI static constexpr __encoding_data __text_encoding_data[] = {
>From bba8c683073602b3a6292ac6cb6ad71f186a4696 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 23:38:37 -0400
Subject: [PATCH 18/54] Add name size field to __encoding_data
---
.../include/__text_encoding/text_encoding.h | 1769 ++++++++---------
1 file changed, 884 insertions(+), 885 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index cf7351f76c21a..a8d570ae58e25 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -42,6 +42,7 @@ struct text_encoding {
struct __encoding_data {
__id_rep __mib_rep_;
const char* __name_;
+ uint_least32_t __name_size_;
friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) noexcept {
return __e.__mib_rep_ == __other.__mib_rep_ || __comp_name(__e.__name_, __other.__name_);
@@ -544,897 +545,895 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
_LIBCPP_ASSERT(__i >= id::other && __i <= id::CP50220, "Passing invalid id to text_encoding constructor!");
- auto __found = std::lower_bound(
- std::begin(__text_encoding_data), std::end(__text_encoding_data), __id_rep(__i));
+ auto __found = std::lower_bound(std::begin(__text_encoding_data), std::end(__text_encoding_data), __id_rep(__i));
return __found != std::end(__text_encoding_data)
? __found
: __text_encoding_data + 1; // only possible way to get unknown is if 33, 34 are passed
}
_LIBCPP_HIDE_FROM_ABI static constexpr __encoding_data __text_encoding_data[] = {
- {1, ""},
- {2, ""},
- {3, "ANSI_X3.4-1968"},
- {3, "ANSI_X3.4-1986"},
- {3, "IBM367"},
- {3, "ISO646-US"},
- {3, "ISO_646.irv:1991"},
- {3, "cp367"},
- {3, "csASCII"},
- {3, "iso-ir-6"},
- {3, "us"},
- {4, "ISO-8859-1"},
- {4, "ISO_8859-1:1987"},
- {4, "CP819"},
- {4, "IBM819"},
- {4, "ISO_8859-1"},
- {4, "csISOLatin1"},
- {4, "iso-ir-100"},
- {4, "l1"},
- {4, "latin1"},
- {5, "ISO-8859-2"},
- {5, "ISO_8859-2:1987"},
- {5, "ISO_8859-2"},
- {5, "csISOLatin2"},
- {5, "iso-ir-101"},
- {5, "l2"},
- {5, "latin2"},
- {6, "ISO-8859-3"},
- {6, "ISO_8859-3:1988"},
- {6, "ISO_8859-3"},
- {6, "csISOLatin3"},
- {6, "iso-ir-109"},
- {6, "l3"},
- {6, "latin3"},
- {7, "ISO-8859-4"},
- {7, "ISO_8859-4:1988"},
- {7, "ISO_8859-4"},
- {7, "csISOLatin4"},
- {7, "iso-ir-110"},
- {7, "l4"},
- {7, "latin4"},
- {8, "ISO-8859-5"},
- {8, "ISO_8859-5:1988"},
- {8, "ISO_8859-5"},
- {8, "csISOLatinCyrillic"},
- {8, "cyrillic"},
- {8, "iso-ir-144"},
- {9, "ISO-8859-6"},
- {9, "ISO_8859-6:1987"},
- {9, "ASMO-708"},
- {9, "ECMA-114"},
- {9, "ISO_8859-6"},
- {9, "arabic"},
- {9, "csISOLatinArabic"},
- {9, "iso-ir-127"},
- {10, "ISO-8859-7"},
- {10, "ISO_8859-7:1987"},
- {10, "ECMA-118"},
- {10, "ELOT_928"},
- {10, "ISO_8859-7"},
- {10, "csISOLatinGreek"},
- {10, "greek"},
- {10, "greek8"},
- {10, "iso-ir-126"},
- {11, "ISO-8859-8"},
- {11, "ISO_8859-8:1988"},
- {11, "ISO_8859-8"},
- {11, "csISOLatinHebrew"},
- {11, "hebrew"},
- {11, "iso-ir-138"},
- {12, "ISO-8859-9"},
- {12, "ISO_8859-9:1989"},
- {12, "ISO_8859-9"},
- {12, "csISOLatin5"},
- {12, "iso-ir-148"},
- {12, "l5"},
- {12, "latin5"},
- {13, "ISO-8859-10"},
- {13, "ISO_8859-10:1992"},
- {13, "csISOLatin6"},
- {13, "iso-ir-157"},
- {13, "l6"},
- {13, "latin6"},
- {14, "ISO_6937-2-add"},
- {14, "csISOTextComm"},
- {14, "iso-ir-142"},
- {15, "JIS_X0201"},
- {15, "X0201"},
- {15, "csHalfWidthKatakana"},
- {16, "JIS_Encoding"},
- {16, "csJISEncoding"},
- {17, "Shift_JIS"},
- {17, "MS_Kanji"},
- {17, "csShiftJIS"},
- {18, "EUC-JP"},
- {18, "Extended_UNIX_Code_Packed_Format_for_Japanese"},
- {18, "csEUCPkdFmtJapanese"},
- {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese"},
- {19, "csEUCFixWidJapanese"},
- {20, "BS_4730"},
- {20, "ISO646-GB"},
- {20, "csISO4UnitedKingdom"},
- {20, "gb"},
- {20, "iso-ir-4"},
- {20, "uk"},
- {21, "SEN_850200_C"},
- {21, "ISO646-SE2"},
- {21, "csISO11SwedishForNames"},
- {21, "iso-ir-11"},
- {21, "se2"},
- {22, "IT"},
- {22, "ISO646-IT"},
- {22, "csISO15Italian"},
- {22, "iso-ir-15"},
- {23, "ES"},
- {23, "ISO646-ES"},
- {23, "csISO17Spanish"},
- {23, "iso-ir-17"},
- {24, "DIN_66003"},
- {24, "ISO646-DE"},
- {24, "csISO21German"},
- {24, "de"},
- {24, "iso-ir-21"},
- {25, "NS_4551-1"},
- {25, "ISO646-NO"},
- {25, "csISO60DanishNorwegian"},
- {25, "csISO60Norwegian1"},
- {25, "iso-ir-60"},
- {25, "no"},
- {26, "NF_Z_62-010"},
- {26, "ISO646-FR"},
- {26, "csISO69French"},
- {26, "fr"},
- {26, "iso-ir-69"},
- {27, "ISO-10646-UTF-1"},
- {27, "csISO10646UTF1"},
- {28, "ISO_646.basic:1983"},
- {28, "csISO646basic1983"},
- {28, "ref"},
- {29, "INVARIANT"},
- {29, "csINVARIANT"},
- {30, "ISO_646.irv:1983"},
- {30, "csISO2IntlRefVersion"},
- {30, "irv"},
- {30, "iso-ir-2"},
- {31, "NATS-SEFI"},
- {31, "csNATSSEFI"},
- {31, "iso-ir-8-1"},
- {32, "NATS-SEFI-ADD"},
- {32, "csNATSSEFIADD"},
- {32, "iso-ir-8-2"},
- {35, "SEN_850200_B"},
- {35, "FI"},
- {35, "ISO646-FI"},
- {35, "ISO646-SE"},
- {35, "csISO10Swedish"},
- {35, "iso-ir-10"},
- {35, "se"},
- {36, "KS_C_5601-1987"},
- {36, "KSC_5601"},
- {36, "KS_C_5601-1989"},
- {36, "csKSC56011987"},
- {36, "iso-ir-149"},
- {36, "korean"},
- {37, "ISO-2022-KR"},
- {37, "csISO2022KR"},
- {38, "EUC-KR"},
- {38, "csEUCKR"},
- {39, "ISO-2022-JP"},
- {39, "csISO2022JP"},
- {40, "ISO-2022-JP-2"},
- {40, "csISO2022JP2"},
- {41, "JIS_C6220-1969-jp"},
- {41, "JIS_C6220-1969"},
- {41, "csISO13JISC6220jp"},
- {41, "iso-ir-13"},
- {41, "katakana"},
- {41, "x0201-7"},
- {42, "JIS_C6220-1969-ro"},
- {42, "ISO646-JP"},
- {42, "csISO14JISC6220ro"},
- {42, "iso-ir-14"},
- {42, "jp"},
- {43, "PT"},
- {43, "ISO646-PT"},
- {43, "csISO16Portuguese"},
- {43, "iso-ir-16"},
- {44, "greek7-old"},
- {44, "csISO18Greek7Old"},
- {44, "iso-ir-18"},
- {45, "latin-greek"},
- {45, "csISO19LatinGreek"},
- {45, "iso-ir-19"},
- {46, "NF_Z_62-010_(1973)"},
- {46, "ISO646-FR1"},
- {46, "csISO25French"},
- {46, "iso-ir-25"},
- {47, "Latin-greek-1"},
- {47, "csISO27LatinGreek1"},
- {47, "iso-ir-27"},
- {48, "ISO_5427"},
- {48, "csISO5427Cyrillic"},
- {48, "iso-ir-37"},
- {49, "JIS_C6226-1978"},
- {49, "csISO42JISC62261978"},
- {49, "iso-ir-42"},
- {50, "BS_viewdata"},
- {50, "csISO47BSViewdata"},
- {50, "iso-ir-47"},
- {51, "INIS"},
- {51, "csISO49INIS"},
- {51, "iso-ir-49"},
- {52, "INIS-8"},
- {52, "csISO50INIS8"},
- {52, "iso-ir-50"},
- {53, "INIS-cyrillic"},
- {53, "csISO51INISCyrillic"},
- {53, "iso-ir-51"},
- {54, "ISO_5427:1981"},
- {54, "ISO5427Cyrillic1981"},
- {54, "csISO54271981"},
- {54, "iso-ir-54"},
- {55, "ISO_5428:1980"},
- {55, "csISO5428Greek"},
- {55, "iso-ir-55"},
- {56, "GB_1988-80"},
- {56, "ISO646-CN"},
- {56, "cn"},
- {56, "csISO57GB1988"},
- {56, "iso-ir-57"},
- {57, "GB_2312-80"},
- {57, "chinese"},
- {57, "csISO58GB231280"},
- {57, "iso-ir-58"},
- {58, "NS_4551-2"},
- {58, "ISO646-NO2"},
- {58, "csISO61Norwegian2"},
- {58, "iso-ir-61"},
- {58, "no2"},
- {59, "videotex-suppl"},
- {59, "csISO70VideotexSupp1"},
- {59, "iso-ir-70"},
- {60, "PT2"},
- {60, "ISO646-PT2"},
- {60, "csISO84Portuguese2"},
- {60, "iso-ir-84"},
- {61, "ES2"},
- {61, "ISO646-ES2"},
- {61, "csISO85Spanish2"},
- {61, "iso-ir-85"},
- {62, "MSZ_7795.3"},
- {62, "ISO646-HU"},
- {62, "csISO86Hungarian"},
- {62, "hu"},
- {62, "iso-ir-86"},
- {63, "JIS_C6226-1983"},
- {63, "JIS_X0208-1983"},
- {63, "csISO87JISX0208"},
- {63, "iso-ir-87"},
- {63, "x0208"},
- {64, "greek7"},
- {64, "csISO88Greek7"},
- {64, "iso-ir-88"},
- {65, "ASMO_449"},
- {65, "ISO_9036"},
- {65, "arabic7"},
- {65, "csISO89ASMO449"},
- {65, "iso-ir-89"},
- {66, "iso-ir-90"},
- {66, "csISO90"},
- {67, "JIS_C6229-1984-a"},
- {67, "csISO91JISC62291984a"},
- {67, "iso-ir-91"},
- {67, "jp-ocr-a"},
- {68, "JIS_C6229-1984-b"},
- {68, "ISO646-JP-OCR-B"},
- {68, "csISO92JISC62991984b"},
- {68, "iso-ir-92"},
- {68, "jp-ocr-b"},
- {69, "JIS_C6229-1984-b-add"},
- {69, "csISO93JIS62291984badd"},
- {69, "iso-ir-93"},
- {69, "jp-ocr-b-add"},
- {70, "JIS_C6229-1984-hand"},
- {70, "csISO94JIS62291984hand"},
- {70, "iso-ir-94"},
- {70, "jp-ocr-hand"},
- {71, "JIS_C6229-1984-hand-add"},
- {71, "csISO95JIS62291984handadd"},
- {71, "iso-ir-95"},
- {71, "jp-ocr-hand-add"},
- {72, "JIS_C6229-1984-kana"},
- {72, "csISO96JISC62291984kana"},
- {72, "iso-ir-96"},
- {73, "ISO_2033-1983"},
- {73, "csISO2033"},
- {73, "e13b"},
- {73, "iso-ir-98"},
- {74, "ANSI_X3.110-1983"},
- {74, "CSA_T500-1983"},
- {74, "NAPLPS"},
- {74, "csISO99NAPLPS"},
- {74, "iso-ir-99"},
- {75, "T.61-7bit"},
- {75, "csISO102T617bit"},
- {75, "iso-ir-102"},
- {76, "T.61-8bit"},
- {76, "T.61"},
- {76, "csISO103T618bit"},
- {76, "iso-ir-103"},
- {77, "ECMA-cyrillic"},
- {77, "KOI8-E"},
- {77, "csISO111ECMACyrillic"},
- {77, "iso-ir-111"},
- {78, "CSA_Z243.4-1985-1"},
- {78, "ISO646-CA"},
- {78, "ca"},
- {78, "csISO121Canadian1"},
- {78, "csa7-1"},
- {78, "csa71"},
- {78, "iso-ir-121"},
- {79, "CSA_Z243.4-1985-2"},
- {79, "ISO646-CA2"},
- {79, "csISO122Canadian2"},
- {79, "csa7-2"},
- {79, "csa72"},
- {79, "iso-ir-122"},
- {80, "CSA_Z243.4-1985-gr"},
- {80, "csISO123CSAZ24341985gr"},
- {80, "iso-ir-123"},
- {81, "ISO-8859-6-E"},
- {81, "ISO_8859-6-E"},
- {81, "csISO88596E"},
- {82, "ISO-8859-6-I"},
- {82, "ISO_8859-6-I"},
- {82, "csISO88596I"},
- {83, "T.101-G2"},
- {83, "csISO128T101G2"},
- {83, "iso-ir-128"},
- {84, "ISO-8859-8-E"},
- {84, "ISO_8859-8-E"},
- {84, "csISO88598E"},
- {85, "ISO-8859-8-I"},
- {85, "ISO_8859-8-I"},
- {85, "csISO88598I"},
- {86, "CSN_369103"},
- {86, "csISO139CSN369103"},
- {86, "iso-ir-139"},
- {87, "JUS_I.B1.002"},
- {87, "ISO646-YU"},
- {87, "csISO141JUSIB1002"},
- {87, "iso-ir-141"},
- {87, "js"},
- {87, "yu"},
- {88, "IEC_P27-1"},
- {88, "csISO143IECP271"},
- {88, "iso-ir-143"},
- {89, "JUS_I.B1.003-serb"},
- {89, "csISO146Serbian"},
- {89, "iso-ir-146"},
- {89, "serbian"},
- {90, "JUS_I.B1.003-mac"},
- {90, "csISO147Macedonian"},
- {90, "iso-ir-147"},
- {90, "macedonian"},
- {91, "greek-ccitt"},
- {91, "csISO150"},
- {91, "csISO150GreekCCITT"},
- {91, "iso-ir-150"},
- {92, "NC_NC00-10:81"},
- {92, "ISO646-CU"},
- {92, "csISO151Cuba"},
- {92, "cuba"},
- {92, "iso-ir-151"},
- {93, "ISO_6937-2-25"},
- {93, "csISO6937Add"},
- {93, "iso-ir-152"},
- {94, "GOST_19768-74"},
- {94, "ST_SEV_358-88"},
- {94, "csISO153GOST1976874"},
- {94, "iso-ir-153"},
- {95, "ISO_8859-supp"},
- {95, "csISO8859Supp"},
- {95, "iso-ir-154"},
- {95, "latin1-2-5"},
- {96, "ISO_10367-box"},
- {96, "csISO10367Box"},
- {96, "iso-ir-155"},
- {97, "latin-lap"},
- {97, "csISO158Lap"},
- {97, "iso-ir-158"},
- {97, "lap"},
- {98, "JIS_X0212-1990"},
- {98, "csISO159JISX02121990"},
- {98, "iso-ir-159"},
- {98, "x0212"},
- {99, "DS_2089"},
- {99, "DS2089"},
- {99, "ISO646-DK"},
- {99, "csISO646Danish"},
- {99, "dk"},
- {100, "us-dk"},
- {100, "csUSDK"},
- {101, "dk-us"},
- {101, "csDKUS"},
- {102, "KSC5636"},
- {102, "ISO646-KR"},
- {102, "csKSC5636"},
- {103, "UNICODE-1-1-UTF-7"},
- {103, "csUnicode11UTF7"},
- {104, "ISO-2022-CN"},
- {104, "csISO2022CN"},
- {105, "ISO-2022-CN-EXT"},
- {105, "csISO2022CNEXT"},
- {106, "UTF-8"},
- {106, "csUTF8"},
- {109, "ISO-8859-13"},
- {109, "csISO885913"},
- {110, "ISO-8859-14"},
- {110, "ISO_8859-14"},
- {110, "ISO_8859-14:1998"},
- {110, "csISO885914"},
- {110, "iso-celtic"},
- {110, "iso-ir-199"},
- {110, "l8"},
- {110, "latin8"},
- {111, "ISO-8859-15"},
- {111, "ISO_8859-15"},
- {111, "Latin-9"},
- {111, "csISO885915"},
- {112, "ISO-8859-16"},
- {112, "ISO_8859-16"},
- {112, "ISO_8859-16:2001"},
- {112, "csISO885916"},
- {112, "iso-ir-226"},
- {112, "l10"},
- {112, "latin10"},
- {113, "GBK"},
- {113, "CP936"},
- {113, "MS936"},
- {113, "csGBK"},
- {113, "windows-936"},
- {114, "GB18030"},
- {114, "csGB18030"},
- {115, "OSD_EBCDIC_DF04_15"},
- {115, "csOSDEBCDICDF0415"},
- {116, "OSD_EBCDIC_DF03_IRV"},
- {116, "csOSDEBCDICDF03IRV"},
- {117, "OSD_EBCDIC_DF04_1"},
- {117, "csOSDEBCDICDF041"},
- {118, "ISO-11548-1"},
- {118, "ISO_11548-1"},
- {118, "ISO_TR_11548-1"},
- {118, "csISO115481"},
- {119, "KZ-1048"},
- {119, "RK1048"},
- {119, "STRK1048-2002"},
- {119, "csKZ1048"},
- {1000, "ISO-10646-UCS-2"},
- {1000, "csUnicode"},
- {1001, "ISO-10646-UCS-4"},
- {1001, "csUCS4"},
- {1002, "ISO-10646-UCS-Basic"},
- {1002, "csUnicodeASCII"},
- {1003, "ISO-10646-Unicode-Latin1"},
- {1003, "ISO-10646"},
- {1003, "csUnicodeLatin1"},
- {1004, "ISO-10646-J-1"},
- {1004, "csUnicodeJapanese"},
- {1005, "ISO-Unicode-IBM-1261"},
- {1005, "csUnicodeIBM1261"},
- {1006, "ISO-Unicode-IBM-1268"},
- {1006, "csUnicodeIBM1268"},
- {1007, "ISO-Unicode-IBM-1276"},
- {1007, "csUnicodeIBM1276"},
- {1008, "ISO-Unicode-IBM-1264"},
- {1008, "csUnicodeIBM1264"},
- {1009, "ISO-Unicode-IBM-1265"},
- {1009, "csUnicodeIBM1265"},
- {1010, "UNICODE-1-1"},
- {1010, "csUnicode11"},
- {1011, "SCSU"},
- {1011, "csSCSU"},
- {1012, "UTF-7"},
- {1012, "csUTF7"},
- {1013, "UTF-16BE"},
- {1013, "csUTF16BE"},
- {1014, "UTF-16LE"},
- {1014, "csUTF16LE"},
- {1015, "UTF-16"},
- {1015, "csUTF16"},
- {1016, "CESU-8"},
- {1016, "csCESU-8"},
- {1016, "csCESU8"},
- {1017, "UTF-32"},
- {1017, "csUTF32"},
- {1018, "UTF-32BE"},
- {1018, "csUTF32BE"},
- {1019, "UTF-32LE"},
- {1019, "csUTF32LE"},
- {1020, "BOCU-1"},
- {1020, "csBOCU-1"},
- {1020, "csBOCU1"},
- {1021, "UTF-7-IMAP"},
- {1021, "csUTF7IMAP"},
- {2000, "ISO-8859-1-Windows-3.0-Latin-1"},
- {2000, "csWindows30Latin1"},
- {2001, "ISO-8859-1-Windows-3.1-Latin-1"},
- {2001, "csWindows31Latin1"},
- {2002, "ISO-8859-2-Windows-Latin-2"},
- {2002, "csWindows31Latin2"},
- {2003, "ISO-8859-9-Windows-Latin-5"},
- {2003, "csWindows31Latin5"},
- {2004, "hp-roman8"},
- {2004, "csHPRoman8"},
- {2004, "r8"},
- {2004, "roman8"},
- {2005, "Adobe-Standard-Encoding"},
- {2005, "csAdobeStandardEncoding"},
- {2006, "Ventura-US"},
- {2006, "csVenturaUS"},
- {2007, "Ventura-International"},
- {2007, "csVenturaInternational"},
- {2008, "DEC-MCS"},
- {2008, "csDECMCS"},
- {2008, "dec"},
- {2009, "IBM850"},
- {2009, "850"},
- {2009, "cp850"},
- {2009, "csPC850Multilingual"},
- {2010, "IBM852"},
- {2010, "852"},
- {2010, "cp852"},
- {2010, "csPCp852"},
- {2011, "IBM437"},
- {2011, "437"},
- {2011, "cp437"},
- {2011, "csPC8CodePage437"},
- {2012, "PC8-Danish-Norwegian"},
- {2012, "csPC8DanishNorwegian"},
- {2013, "IBM862"},
- {2013, "862"},
- {2013, "cp862"},
- {2013, "csPC862LatinHebrew"},
- {2014, "PC8-Turkish"},
- {2014, "csPC8Turkish"},
- {2015, "IBM-Symbols"},
- {2015, "csIBMSymbols"},
- {2016, "IBM-Thai"},
- {2016, "csIBMThai"},
- {2017, "HP-Legal"},
- {2017, "csHPLegal"},
- {2018, "HP-Pi-font"},
- {2018, "csHPPiFont"},
- {2019, "HP-Math8"},
- {2019, "csHPMath8"},
- {2020, "Adobe-Symbol-Encoding"},
- {2020, "csHPPSMath"},
- {2021, "HP-DeskTop"},
- {2021, "csHPDesktop"},
- {2022, "Ventura-Math"},
- {2022, "csVenturaMath"},
- {2023, "Microsoft-Publishing"},
- {2023, "csMicrosoftPublishing"},
- {2024, "Windows-31J"},
- {2024, "csWindows31J"},
- {2025, "GB2312"},
- {2025, "csGB2312"},
- {2026, "Big5"},
- {2026, "csBig5"},
- {2027, "macintosh"},
- {2027, "csMacintosh"},
- {2027, "mac"},
- {2028, "IBM037"},
- {2028, "cp037"},
- {2028, "csIBM037"},
- {2028, "ebcdic-cp-ca"},
- {2028, "ebcdic-cp-nl"},
- {2028, "ebcdic-cp-us"},
- {2028, "ebcdic-cp-wt"},
- {2029, "IBM038"},
- {2029, "EBCDIC-INT"},
- {2029, "cp038"},
- {2029, "csIBM038"},
- {2030, "IBM273"},
- {2030, "CP273"},
- {2030, "csIBM273"},
- {2031, "IBM274"},
- {2031, "CP274"},
- {2031, "EBCDIC-BE"},
- {2031, "csIBM274"},
- {2032, "IBM275"},
- {2032, "EBCDIC-BR"},
- {2032, "cp275"},
- {2032, "csIBM275"},
- {2033, "IBM277"},
- {2033, "EBCDIC-CP-DK"},
- {2033, "EBCDIC-CP-NO"},
- {2033, "csIBM277"},
- {2034, "IBM278"},
- {2034, "CP278"},
- {2034, "csIBM278"},
- {2034, "ebcdic-cp-fi"},
- {2034, "ebcdic-cp-se"},
- {2035, "IBM280"},
- {2035, "CP280"},
- {2035, "csIBM280"},
- {2035, "ebcdic-cp-it"},
- {2036, "IBM281"},
- {2036, "EBCDIC-JP-E"},
- {2036, "cp281"},
- {2036, "csIBM281"},
- {2037, "IBM284"},
- {2037, "CP284"},
- {2037, "csIBM284"},
- {2037, "ebcdic-cp-es"},
- {2038, "IBM285"},
- {2038, "CP285"},
- {2038, "csIBM285"},
- {2038, "ebcdic-cp-gb"},
- {2039, "IBM290"},
- {2039, "EBCDIC-JP-kana"},
- {2039, "cp290"},
- {2039, "csIBM290"},
- {2040, "IBM297"},
- {2040, "cp297"},
- {2040, "csIBM297"},
- {2040, "ebcdic-cp-fr"},
- {2041, "IBM420"},
- {2041, "cp420"},
- {2041, "csIBM420"},
- {2041, "ebcdic-cp-ar1"},
- {2042, "IBM423"},
- {2042, "cp423"},
- {2042, "csIBM423"},
- {2042, "ebcdic-cp-gr"},
- {2043, "IBM424"},
- {2043, "cp424"},
- {2043, "csIBM424"},
- {2043, "ebcdic-cp-he"},
- {2044, "IBM500"},
- {2044, "CP500"},
- {2044, "csIBM500"},
- {2044, "ebcdic-cp-be"},
- {2044, "ebcdic-cp-ch"},
- {2045, "IBM851"},
- {2045, "851"},
- {2045, "cp851"},
- {2045, "csIBM851"},
- {2046, "IBM855"},
- {2046, "855"},
- {2046, "cp855"},
- {2046, "csIBM855"},
- {2047, "IBM857"},
- {2047, "857"},
- {2047, "cp857"},
- {2047, "csIBM857"},
- {2048, "IBM860"},
- {2048, "860"},
- {2048, "cp860"},
- {2048, "csIBM860"},
- {2049, "IBM861"},
- {2049, "861"},
- {2049, "cp-is"},
- {2049, "cp861"},
- {2049, "csIBM861"},
- {2050, "IBM863"},
- {2050, "863"},
- {2050, "cp863"},
- {2050, "csIBM863"},
- {2051, "IBM864"},
- {2051, "cp864"},
- {2051, "csIBM864"},
- {2052, "IBM865"},
- {2052, "865"},
- {2052, "cp865"},
- {2052, "csIBM865"},
- {2053, "IBM868"},
- {2053, "CP868"},
- {2053, "cp-ar"},
- {2053, "csIBM868"},
- {2054, "IBM869"},
- {2054, "869"},
- {2054, "cp-gr"},
- {2054, "cp869"},
- {2054, "csIBM869"},
- {2055, "IBM870"},
- {2055, "CP870"},
- {2055, "csIBM870"},
- {2055, "ebcdic-cp-roece"},
- {2055, "ebcdic-cp-yu"},
- {2056, "IBM871"},
- {2056, "CP871"},
- {2056, "csIBM871"},
- {2056, "ebcdic-cp-is"},
- {2057, "IBM880"},
- {2057, "EBCDIC-Cyrillic"},
- {2057, "cp880"},
- {2057, "csIBM880"},
- {2058, "IBM891"},
- {2058, "cp891"},
- {2058, "csIBM891"},
- {2059, "IBM903"},
- {2059, "cp903"},
- {2059, "csIBM903"},
- {2060, "IBM904"},
- {2060, "904"},
- {2060, "cp904"},
- {2060, "csIBBM904"},
- {2061, "IBM905"},
- {2061, "CP905"},
- {2061, "csIBM905"},
- {2061, "ebcdic-cp-tr"},
- {2062, "IBM918"},
- {2062, "CP918"},
- {2062, "csIBM918"},
- {2062, "ebcdic-cp-ar2"},
- {2063, "IBM1026"},
- {2063, "CP1026"},
- {2063, "csIBM1026"},
- {2064, "EBCDIC-AT-DE"},
- {2064, "csIBMEBCDICATDE"},
- {2065, "EBCDIC-AT-DE-A"},
- {2065, "csEBCDICATDEA"},
- {2066, "EBCDIC-CA-FR"},
- {2066, "csEBCDICCAFR"},
- {2067, "EBCDIC-DK-NO"},
- {2067, "csEBCDICDKNO"},
- {2068, "EBCDIC-DK-NO-A"},
- {2068, "csEBCDICDKNOA"},
- {2069, "EBCDIC-FI-SE"},
- {2069, "csEBCDICFISE"},
- {2070, "EBCDIC-FI-SE-A"},
- {2070, "csEBCDICFISEA"},
- {2071, "EBCDIC-FR"},
- {2071, "csEBCDICFR"},
- {2072, "EBCDIC-IT"},
- {2072, "csEBCDICIT"},
- {2073, "EBCDIC-PT"},
- {2073, "csEBCDICPT"},
- {2074, "EBCDIC-ES"},
- {2074, "csEBCDICES"},
- {2075, "EBCDIC-ES-A"},
- {2075, "csEBCDICESA"},
- {2076, "EBCDIC-ES-S"},
- {2076, "csEBCDICESS"},
- {2077, "EBCDIC-UK"},
- {2077, "csEBCDICUK"},
- {2078, "EBCDIC-US"},
- {2078, "csEBCDICUS"},
- {2079, "UNKNOWN-8BIT"},
- {2079, "csUnknown8BiT"},
- {2080, "MNEMONIC"},
- {2080, "csMnemonic"},
- {2081, "MNEM"},
- {2081, "csMnem"},
- {2082, "VISCII"},
- {2082, "csVISCII"},
- {2083, "VIQR"},
- {2083, "csVIQR"},
- {2084, "KOI8-R"},
- {2084, "csKOI8R"},
- {2085, "HZ-GB-2312"},
- {2086, "IBM866"},
- {2086, "866"},
- {2086, "cp866"},
- {2086, "csIBM866"},
- {2087, "IBM775"},
- {2087, "cp775"},
- {2087, "csPC775Baltic"},
- {2088, "KOI8-U"},
- {2088, "csKOI8U"},
- {2089, "IBM00858"},
- {2089, "CCSID00858"},
- {2089, "CP00858"},
- {2089, "PC-Multilingual-850+euro"},
- {2089, "csIBM00858"},
- {2090, "IBM00924"},
- {2090, "CCSID00924"},
- {2090, "CP00924"},
- {2090, "csIBM00924"},
- {2090, "ebcdic-Latin9--euro"},
- {2091, "IBM01140"},
- {2091, "CCSID01140"},
- {2091, "CP01140"},
- {2091, "csIBM01140"},
- {2091, "ebcdic-us-37+euro"},
- {2092, "IBM01141"},
- {2092, "CCSID01141"},
- {2092, "CP01141"},
- {2092, "csIBM01141"},
- {2092, "ebcdic-de-273+euro"},
- {2093, "IBM01142"},
- {2093, "CCSID01142"},
- {2093, "CP01142"},
- {2093, "csIBM01142"},
- {2093, "ebcdic-dk-277+euro"},
- {2093, "ebcdic-no-277+euro"},
- {2094, "IBM01143"},
- {2094, "CCSID01143"},
- {2094, "CP01143"},
- {2094, "csIBM01143"},
- {2094, "ebcdic-fi-278+euro"},
- {2094, "ebcdic-se-278+euro"},
- {2095, "IBM01144"},
- {2095, "CCSID01144"},
- {2095, "CP01144"},
- {2095, "csIBM01144"},
- {2095, "ebcdic-it-280+euro"},
- {2096, "IBM01145"},
- {2096, "CCSID01145"},
- {2096, "CP01145"},
- {2096, "csIBM01145"},
- {2096, "ebcdic-es-284+euro"},
- {2097, "IBM01146"},
- {2097, "CCSID01146"},
- {2097, "CP01146"},
- {2097, "csIBM01146"},
- {2097, "ebcdic-gb-285+euro"},
- {2098, "IBM01147"},
- {2098, "CCSID01147"},
- {2098, "CP01147"},
- {2098, "csIBM01147"},
- {2098, "ebcdic-fr-297+euro"},
- {2099, "IBM01148"},
- {2099, "CCSID01148"},
- {2099, "CP01148"},
- {2099, "csIBM01148"},
- {2099, "ebcdic-international-500+euro"},
- {2100, "IBM01149"},
- {2100, "CCSID01149"},
- {2100, "CP01149"},
- {2100, "csIBM01149"},
- {2100, "ebcdic-is-871+euro"},
- {2101, "Big5-HKSCS"},
- {2101, "csBig5HKSCS"},
- {2102, "IBM1047"},
- {2102, "IBM-1047"},
- {2102, "csIBM1047"},
- {2103, "PTCP154"},
- {2103, "CP154"},
- {2103, "Cyrillic-Asian"},
- {2103, "PT154"},
- {2103, "csPTCP154"},
- {2104, "Amiga-1251"},
- {2104, "Ami-1251"},
- {2104, "Ami1251"},
- {2104, "Amiga1251"},
- {2104, "csAmiga1251"},
- {2105, "KOI7-switched"},
- {2105, "csKOI7switched"},
- {2106, "BRF"},
- {2106, "csBRF"},
- {2107, "TSCII"},
- {2107, "csTSCII"},
- {2108, "CP51932"},
- {2108, "csCP51932"},
- {2109, "windows-874"},
- {2109, "cswindows874"},
- {2250, "windows-1250"},
- {2250, "cswindows1250"},
- {2251, "windows-1251"},
- {2251, "cswindows1251"},
- {2252, "windows-1252"},
- {2252, "cswindows1252"},
- {2253, "windows-1253"},
- {2253, "cswindows1253"},
- {2254, "windows-1254"},
- {2254, "cswindows1254"},
- {2255, "windows-1255"},
- {2255, "cswindows1255"},
- {2256, "windows-1256"},
- {2256, "cswindows1256"},
- {2257, "windows-1257"},
- {2257, "cswindows1257"},
- {2258, "windows-1258"},
- {2258, "cswindows1258"},
- {2259, "TIS-620"},
- {2259, "ISO-8859-11"},
- {2259, "csTIS620"},
- {2260, "CP50220"},
- {2260, "csCP50220"},
- {9999, nullptr} // sentinel
+ {1, "", 0},
+ {2, "", 0},
+ {3, "ANSI_X3.4-1968", 14},
+ {3, "ANSI_X3.4-1986", 14},
+ {3, "IBM367", 6},
+ {3, "ISO646-US", 9},
+ {3, "ISO_646.irv:1991", 16},
+ {3, "cp367", 5},
+ {3, "csASCII", 7},
+ {3, "iso-ir-6", 8},
+ {3, "us", 2},
+ {4, "ISO-8859-1", 10},
+ {4, "ISO_8859-1:1987", 15},
+ {4, "CP819", 5},
+ {4, "IBM819", 6},
+ {4, "ISO_8859-1", 10},
+ {4, "csISOLatin1", 11},
+ {4, "iso-ir-100", 10},
+ {4, "l1", 2},
+ {4, "latin1", 6},
+ {5, "ISO-8859-2", 10},
+ {5, "ISO_8859-2:1987", 15},
+ {5, "ISO_8859-2", 10},
+ {5, "csISOLatin2", 11},
+ {5, "iso-ir-101", 10},
+ {5, "l2", 2},
+ {5, "latin2", 6},
+ {6, "ISO-8859-3", 10},
+ {6, "ISO_8859-3:1988", 15},
+ {6, "ISO_8859-3", 10},
+ {6, "csISOLatin3", 11},
+ {6, "iso-ir-109", 10},
+ {6, "l3", 2},
+ {6, "latin3", 6},
+ {7, "ISO-8859-4", 10},
+ {7, "ISO_8859-4:1988", 15},
+ {7, "ISO_8859-4", 10},
+ {7, "csISOLatin4", 11},
+ {7, "iso-ir-110", 10},
+ {7, "l4", 2},
+ {7, "latin4", 6},
+ {8, "ISO-8859-5", 10},
+ {8, "ISO_8859-5:1988", 15},
+ {8, "ISO_8859-5", 10},
+ {8, "csISOLatinCyrillic", 18},
+ {8, "cyrillic", 8},
+ {8, "iso-ir-144", 10},
+ {9, "ISO-8859-6", 10},
+ {9, "ISO_8859-6:1987", 15},
+ {9, "ASMO-708", 8},
+ {9, "ECMA-114", 8},
+ {9, "ISO_8859-6", 10},
+ {9, "arabic", 6},
+ {9, "csISOLatinArabic", 16},
+ {9, "iso-ir-127", 10},
+ {10, "ISO-8859-7", 10},
+ {10, "ISO_8859-7:1987", 15},
+ {10, "ECMA-118", 8},
+ {10, "ELOT_928", 8},
+ {10, "ISO_8859-7", 10},
+ {10, "csISOLatinGreek", 15},
+ {10, "greek", 5},
+ {10, "greek8", 6},
+ {10, "iso-ir-126", 10},
+ {11, "ISO-8859-8", 10},
+ {11, "ISO_8859-8:1988", 15},
+ {11, "ISO_8859-8", 10},
+ {11, "csISOLatinHebrew", 16},
+ {11, "hebrew", 6},
+ {11, "iso-ir-138", 10},
+ {12, "ISO-8859-9", 10},
+ {12, "ISO_8859-9:1989", 15},
+ {12, "ISO_8859-9", 10},
+ {12, "csISOLatin5", 11},
+ {12, "iso-ir-148", 10},
+ {12, "l5", 2},
+ {12, "latin5", 6},
+ {13, "ISO-8859-10", 11},
+ {13, "ISO_8859-10:1992", 16},
+ {13, "csISOLatin6", 11},
+ {13, "iso-ir-157", 10},
+ {13, "l6", 2},
+ {13, "latin6", 6},
+ {14, "ISO_6937-2-add", 14},
+ {14, "csISOTextComm", 13},
+ {14, "iso-ir-142", 10},
+ {15, "JIS_X0201", 9},
+ {15, "X0201", 5},
+ {15, "csHalfWidthKatakana", 19},
+ {16, "JIS_Encoding", 12},
+ {16, "csJISEncoding", 13},
+ {17, "Shift_JIS", 9},
+ {17, "MS_Kanji", 8},
+ {17, "csShiftJIS", 10},
+ {18, "EUC-JP", 6},
+ {18, "Extended_UNIX_Code_Packed_Format_for_Japanese", 45},
+ {18, "csEUCPkdFmtJapanese", 19},
+ {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese", 43},
+ {19, "csEUCFixWidJapanese", 19},
+ {20, "BS_4730", 7},
+ {20, "ISO646-GB", 9},
+ {20, "csISO4UnitedKingdom", 19},
+ {20, "gb", 2},
+ {20, "iso-ir-4", 8},
+ {20, "uk", 2},
+ {21, "SEN_850200_C", 12},
+ {21, "ISO646-SE2", 10},
+ {21, "csISO11SwedishForNames", 22},
+ {21, "iso-ir-11", 9},
+ {21, "se2", 3},
+ {22, "IT", 2},
+ {22, "ISO646-IT", 9},
+ {22, "csISO15Italian", 14},
+ {22, "iso-ir-15", 9},
+ {23, "ES", 2},
+ {23, "ISO646-ES", 9},
+ {23, "csISO17Spanish", 14},
+ {23, "iso-ir-17", 9},
+ {24, "DIN_66003", 9},
+ {24, "ISO646-DE", 9},
+ {24, "csISO21German", 13},
+ {24, "de", 2},
+ {24, "iso-ir-21", 9},
+ {25, "NS_4551-1", 9},
+ {25, "ISO646-NO", 9},
+ {25, "csISO60DanishNorwegian", 22},
+ {25, "csISO60Norwegian1", 17},
+ {25, "iso-ir-60", 9},
+ {25, "no", 2},
+ {26, "NF_Z_62-010", 11},
+ {26, "ISO646-FR", 9},
+ {26, "csISO69French", 13},
+ {26, "fr", 2},
+ {26, "iso-ir-69", 9},
+ {27, "ISO-10646-UTF-1", 15},
+ {27, "csISO10646UTF1", 14},
+ {28, "ISO_646.basic:1983", 18},
+ {28, "csISO646basic1983", 17},
+ {28, "ref", 3},
+ {29, "INVARIANT", 9},
+ {29, "csINVARIANT", 11},
+ {30, "ISO_646.irv:1983", 16},
+ {30, "csISO2IntlRefVersion", 20},
+ {30, "irv", 3},
+ {30, "iso-ir-2", 8},
+ {31, "NATS-SEFI", 9},
+ {31, "csNATSSEFI", 10},
+ {31, "iso-ir-8-1", 10},
+ {32, "NATS-SEFI-ADD", 13},
+ {32, "csNATSSEFIADD", 13},
+ {32, "iso-ir-8-2", 10},
+ {35, "SEN_850200_B", 12},
+ {35, "FI", 2},
+ {35, "ISO646-FI", 9},
+ {35, "ISO646-SE", 9},
+ {35, "csISO10Swedish", 14},
+ {35, "iso-ir-10", 9},
+ {35, "se", 2},
+ {36, "KS_C_5601-1987", 14},
+ {36, "KSC_5601", 8},
+ {36, "KS_C_5601-1989", 14},
+ {36, "csKSC56011987", 13},
+ {36, "iso-ir-149", 10},
+ {36, "korean", 6},
+ {37, "ISO-2022-KR", 11},
+ {37, "csISO2022KR", 11},
+ {38, "EUC-KR", 6},
+ {38, "csEUCKR", 7},
+ {39, "ISO-2022-JP", 11},
+ {39, "csISO2022JP", 11},
+ {40, "ISO-2022-JP-2", 13},
+ {40, "csISO2022JP2", 12},
+ {41, "JIS_C6220-1969-jp", 17},
+ {41, "JIS_C6220-1969", 14},
+ {41, "csISO13JISC6220jp", 17},
+ {41, "iso-ir-13", 9},
+ {41, "katakana", 8},
+ {41, "x0201-7", 7},
+ {42, "JIS_C6220-1969-ro", 17},
+ {42, "ISO646-JP", 9},
+ {42, "csISO14JISC6220ro", 17},
+ {42, "iso-ir-14", 9},
+ {42, "jp", 2},
+ {43, "PT", 2},
+ {43, "ISO646-PT", 9},
+ {43, "csISO16Portuguese", 17},
+ {43, "iso-ir-16", 9},
+ {44, "greek7-old", 10},
+ {44, "csISO18Greek7Old", 16},
+ {44, "iso-ir-18", 9},
+ {45, "latin-greek", 11},
+ {45, "csISO19LatinGreek", 17},
+ {45, "iso-ir-19", 9},
+ {46, "NF_Z_62-010_(1973)", 18},
+ {46, "ISO646-FR1", 10},
+ {46, "csISO25French", 13},
+ {46, "iso-ir-25", 9},
+ {47, "Latin-greek-1", 13},
+ {47, "csISO27LatinGreek1", 18},
+ {47, "iso-ir-27", 9},
+ {48, "ISO_5427", 8},
+ {48, "csISO5427Cyrillic", 17},
+ {48, "iso-ir-37", 9},
+ {49, "JIS_C6226-1978", 14},
+ {49, "csISO42JISC62261978", 19},
+ {49, "iso-ir-42", 9},
+ {50, "BS_viewdata", 11},
+ {50, "csISO47BSViewdata", 17},
+ {50, "iso-ir-47", 9},
+ {51, "INIS", 4},
+ {51, "csISO49INIS", 11},
+ {51, "iso-ir-49", 9},
+ {52, "INIS-8", 6},
+ {52, "csISO50INIS8", 12},
+ {52, "iso-ir-50", 9},
+ {53, "INIS-cyrillic", 13},
+ {53, "csISO51INISCyrillic", 19},
+ {53, "iso-ir-51", 9},
+ {54, "ISO_5427:1981", 13},
+ {54, "ISO5427Cyrillic1981", 19},
+ {54, "csISO54271981", 13},
+ {54, "iso-ir-54", 9},
+ {55, "ISO_5428:1980", 13},
+ {55, "csISO5428Greek", 14},
+ {55, "iso-ir-55", 9},
+ {56, "GB_1988-80", 10},
+ {56, "ISO646-CN", 9},
+ {56, "cn", 2},
+ {56, "csISO57GB1988", 13},
+ {56, "iso-ir-57", 9},
+ {57, "GB_2312-80", 10},
+ {57, "chinese", 7},
+ {57, "csISO58GB231280", 15},
+ {57, "iso-ir-58", 9},
+ {58, "NS_4551-2", 9},
+ {58, "ISO646-NO2", 10},
+ {58, "csISO61Norwegian2", 17},
+ {58, "iso-ir-61", 9},
+ {58, "no2", 3},
+ {59, "videotex-suppl", 14},
+ {59, "csISO70VideotexSupp1", 20},
+ {59, "iso-ir-70", 9},
+ {60, "PT2", 3},
+ {60, "ISO646-PT2", 10},
+ {60, "csISO84Portuguese2", 18},
+ {60, "iso-ir-84", 9},
+ {61, "ES2", 3},
+ {61, "ISO646-ES2", 10},
+ {61, "csISO85Spanish2", 15},
+ {61, "iso-ir-85", 9},
+ {62, "MSZ_7795.3", 10},
+ {62, "ISO646-HU", 9},
+ {62, "csISO86Hungarian", 16},
+ {62, "hu", 2},
+ {62, "iso-ir-86", 9},
+ {63, "JIS_C6226-1983", 14},
+ {63, "JIS_X0208-1983", 14},
+ {63, "csISO87JISX0208", 15},
+ {63, "iso-ir-87", 9},
+ {63, "x0208", 5},
+ {64, "greek7", 6},
+ {64, "csISO88Greek7", 13},
+ {64, "iso-ir-88", 9},
+ {65, "ASMO_449", 8},
+ {65, "ISO_9036", 8},
+ {65, "arabic7", 7},
+ {65, "csISO89ASMO449", 14},
+ {65, "iso-ir-89", 9},
+ {66, "iso-ir-90", 9},
+ {66, "csISO90", 7},
+ {67, "JIS_C6229-1984-a", 16},
+ {67, "csISO91JISC62291984a", 20},
+ {67, "iso-ir-91", 9},
+ {67, "jp-ocr-a", 8},
+ {68, "JIS_C6229-1984-b", 16},
+ {68, "ISO646-JP-OCR-B", 15},
+ {68, "csISO92JISC62991984b", 20},
+ {68, "iso-ir-92", 9},
+ {68, "jp-ocr-b", 8},
+ {69, "JIS_C6229-1984-b-add", 20},
+ {69, "csISO93JIS62291984badd", 22},
+ {69, "iso-ir-93", 9},
+ {69, "jp-ocr-b-add", 12},
+ {70, "JIS_C6229-1984-hand", 19},
+ {70, "csISO94JIS62291984hand", 22},
+ {70, "iso-ir-94", 9},
+ {70, "jp-ocr-hand", 11},
+ {71, "JIS_C6229-1984-hand-add", 23},
+ {71, "csISO95JIS62291984handadd", 25},
+ {71, "iso-ir-95", 9},
+ {71, "jp-ocr-hand-add", 15},
+ {72, "JIS_C6229-1984-kana", 19},
+ {72, "csISO96JISC62291984kana", 23},
+ {72, "iso-ir-96", 9},
+ {73, "ISO_2033-1983", 13},
+ {73, "csISO2033", 9},
+ {73, "e13b", 4},
+ {73, "iso-ir-98", 9},
+ {74, "ANSI_X3.110-1983", 16},
+ {74, "CSA_T500-1983", 13},
+ {74, "NAPLPS", 6},
+ {74, "csISO99NAPLPS", 13},
+ {74, "iso-ir-99", 9},
+ {75, "T.61-7bit", 9},
+ {75, "csISO102T617bit", 15},
+ {75, "iso-ir-102", 10},
+ {76, "T.61-8bit", 9},
+ {76, "T.61", 4},
+ {76, "csISO103T618bit", 15},
+ {76, "iso-ir-103", 10},
+ {77, "ECMA-cyrillic", 13},
+ {77, "KOI8-E", 6},
+ {77, "csISO111ECMACyrillic", 20},
+ {77, "iso-ir-111", 10},
+ {78, "CSA_Z243.4-1985-1", 17},
+ {78, "ISO646-CA", 9},
+ {78, "ca", 2},
+ {78, "csISO121Canadian1", 17},
+ {78, "csa7-1", 6},
+ {78, "csa71", 5},
+ {78, "iso-ir-121", 10},
+ {79, "CSA_Z243.4-1985-2", 17},
+ {79, "ISO646-CA2", 10},
+ {79, "csISO122Canadian2", 17},
+ {79, "csa7-2", 6},
+ {79, "csa72", 5},
+ {79, "iso-ir-122", 10},
+ {80, "CSA_Z243.4-1985-gr", 18},
+ {80, "csISO123CSAZ24341985gr", 22},
+ {80, "iso-ir-123", 10},
+ {81, "ISO-8859-6-E", 12},
+ {81, "ISO_8859-6-E", 12},
+ {81, "csISO88596E", 11},
+ {82, "ISO-8859-6-I", 12},
+ {82, "ISO_8859-6-I", 12},
+ {82, "csISO88596I", 11},
+ {83, "T.101-G2", 8},
+ {83, "csISO128T101G2", 14},
+ {83, "iso-ir-128", 10},
+ {84, "ISO-8859-8-E", 12},
+ {84, "ISO_8859-8-E", 12},
+ {84, "csISO88598E", 11},
+ {85, "ISO-8859-8-I", 12},
+ {85, "ISO_8859-8-I", 12},
+ {85, "csISO88598I", 11},
+ {86, "CSN_369103", 10},
+ {86, "csISO139CSN369103", 17},
+ {86, "iso-ir-139", 10},
+ {87, "JUS_I.B1.002", 12},
+ {87, "ISO646-YU", 9},
+ {87, "csISO141JUSIB1002", 17},
+ {87, "iso-ir-141", 10},
+ {87, "js", 2},
+ {87, "yu", 2},
+ {88, "IEC_P27-1", 9},
+ {88, "csISO143IECP271", 15},
+ {88, "iso-ir-143", 10},
+ {89, "JUS_I.B1.003-serb", 17},
+ {89, "csISO146Serbian", 15},
+ {89, "iso-ir-146", 10},
+ {89, "serbian", 7},
+ {90, "JUS_I.B1.003-mac", 16},
+ {90, "csISO147Macedonian", 18},
+ {90, "iso-ir-147", 10},
+ {90, "macedonian", 10},
+ {91, "greek-ccitt", 11},
+ {91, "csISO150", 8},
+ {91, "csISO150GreekCCITT", 18},
+ {91, "iso-ir-150", 10},
+ {92, "NC_NC00-10:81", 13},
+ {92, "ISO646-CU", 9},
+ {92, "csISO151Cuba", 12},
+ {92, "cuba", 4},
+ {92, "iso-ir-151", 10},
+ {93, "ISO_6937-2-25", 13},
+ {93, "csISO6937Add", 12},
+ {93, "iso-ir-152", 10},
+ {94, "GOST_19768-74", 13},
+ {94, "ST_SEV_358-88", 13},
+ {94, "csISO153GOST1976874", 19},
+ {94, "iso-ir-153", 10},
+ {95, "ISO_8859-supp", 13},
+ {95, "csISO8859Supp", 13},
+ {95, "iso-ir-154", 10},
+ {95, "latin1-2-5", 10},
+ {96, "ISO_10367-box", 13},
+ {96, "csISO10367Box", 13},
+ {96, "iso-ir-155", 10},
+ {97, "latin-lap", 9},
+ {97, "csISO158Lap", 11},
+ {97, "iso-ir-158", 10},
+ {97, "lap", 3},
+ {98, "JIS_X0212-1990", 14},
+ {98, "csISO159JISX02121990", 20},
+ {98, "iso-ir-159", 10},
+ {98, "x0212", 5},
+ {99, "DS_2089", 7},
+ {99, "DS2089", 6},
+ {99, "ISO646-DK", 9},
+ {99, "csISO646Danish", 14},
+ {99, "dk", 2},
+ {100, "us-dk", 5},
+ {100, "csUSDK", 6},
+ {101, "dk-us", 5},
+ {101, "csDKUS", 6},
+ {102, "KSC5636", 7},
+ {102, "ISO646-KR", 9},
+ {102, "csKSC5636", 9},
+ {103, "UNICODE-1-1-UTF-7", 17},
+ {103, "csUnicode11UTF7", 15},
+ {104, "ISO-2022-CN", 11},
+ {104, "csISO2022CN", 11},
+ {105, "ISO-2022-CN-EXT", 15},
+ {105, "csISO2022CNEXT", 14},
+ {106, "UTF-8", 5},
+ {106, "csUTF8", 6},
+ {109, "ISO-8859-13", 11},
+ {109, "csISO885913", 11},
+ {110, "ISO-8859-14", 11},
+ {110, "ISO_8859-14", 11},
+ {110, "ISO_8859-14:1998", 16},
+ {110, "csISO885914", 11},
+ {110, "iso-celtic", 10},
+ {110, "iso-ir-199", 10},
+ {110, "l8", 2},
+ {110, "latin8", 6},
+ {111, "ISO-8859-15", 11},
+ {111, "ISO_8859-15", 11},
+ {111, "Latin-9", 7},
+ {111, "csISO885915", 11},
+ {112, "ISO-8859-16", 11},
+ {112, "ISO_8859-16", 11},
+ {112, "ISO_8859-16:2001", 16},
+ {112, "csISO885916", 11},
+ {112, "iso-ir-226", 10},
+ {112, "l10", 3},
+ {112, "latin10", 7},
+ {113, "GBK", 3},
+ {113, "CP936", 5},
+ {113, "MS936", 5},
+ {113, "csGBK", 5},
+ {113, "windows-936", 11},
+ {114, "GB18030", 7},
+ {114, "csGB18030", 9},
+ {115, "OSD_EBCDIC_DF04_15", 18},
+ {115, "csOSDEBCDICDF0415", 17},
+ {116, "OSD_EBCDIC_DF03_IRV", 19},
+ {116, "csOSDEBCDICDF03IRV", 18},
+ {117, "OSD_EBCDIC_DF04_1", 17},
+ {117, "csOSDEBCDICDF041", 16},
+ {118, "ISO-11548-1", 11},
+ {118, "ISO_11548-1", 11},
+ {118, "ISO_TR_11548-1", 14},
+ {118, "csISO115481", 11},
+ {119, "KZ-1048", 7},
+ {119, "RK1048", 6},
+ {119, "STRK1048-2002", 13},
+ {119, "csKZ1048", 8},
+ {1000, "ISO-10646-UCS-2", 15},
+ {1000, "csUnicode", 9},
+ {1001, "ISO-10646-UCS-4", 15},
+ {1001, "csUCS4", 6},
+ {1002, "ISO-10646-UCS-Basic", 19},
+ {1002, "csUnicodeASCII", 14},
+ {1003, "ISO-10646-Unicode-Latin1", 24},
+ {1003, "ISO-10646", 9},
+ {1003, "csUnicodeLatin1", 15},
+ {1004, "ISO-10646-J-1", 13},
+ {1004, "csUnicodeJapanese", 17},
+ {1005, "ISO-Unicode-IBM-1261", 20},
+ {1005, "csUnicodeIBM1261", 16},
+ {1006, "ISO-Unicode-IBM-1268", 20},
+ {1006, "csUnicodeIBM1268", 16},
+ {1007, "ISO-Unicode-IBM-1276", 20},
+ {1007, "csUnicodeIBM1276", 16},
+ {1008, "ISO-Unicode-IBM-1264", 20},
+ {1008, "csUnicodeIBM1264", 16},
+ {1009, "ISO-Unicode-IBM-1265", 20},
+ {1009, "csUnicodeIBM1265", 16},
+ {1010, "UNICODE-1-1", 11},
+ {1010, "csUnicode11", 11},
+ {1011, "SCSU", 4},
+ {1011, "csSCSU", 6},
+ {1012, "UTF-7", 5},
+ {1012, "csUTF7", 6},
+ {1013, "UTF-16BE", 8},
+ {1013, "csUTF16BE", 9},
+ {1014, "UTF-16LE", 8},
+ {1014, "csUTF16LE", 9},
+ {1015, "UTF-16", 6},
+ {1015, "csUTF16", 7},
+ {1016, "CESU-8", 6},
+ {1016, "csCESU-8", 8},
+ {1016, "csCESU8", 7},
+ {1017, "UTF-32", 6},
+ {1017, "csUTF32", 7},
+ {1018, "UTF-32BE", 8},
+ {1018, "csUTF32BE", 9},
+ {1019, "UTF-32LE", 8},
+ {1019, "csUTF32LE", 9},
+ {1020, "BOCU-1", 6},
+ {1020, "csBOCU-1", 8},
+ {1020, "csBOCU1", 7},
+ {1021, "UTF-7-IMAP", 10},
+ {1021, "csUTF7IMAP", 10},
+ {2000, "ISO-8859-1-Windows-3.0-Latin-1", 30},
+ {2000, "csWindows30Latin1", 17},
+ {2001, "ISO-8859-1-Windows-3.1-Latin-1", 30},
+ {2001, "csWindows31Latin1", 17},
+ {2002, "ISO-8859-2-Windows-Latin-2", 26},
+ {2002, "csWindows31Latin2", 17},
+ {2003, "ISO-8859-9-Windows-Latin-5", 26},
+ {2003, "csWindows31Latin5", 17},
+ {2004, "hp-roman8", 9},
+ {2004, "csHPRoman8", 10},
+ {2004, "r8", 2},
+ {2004, "roman8", 6},
+ {2005, "Adobe-Standard-Encoding", 23},
+ {2005, "csAdobeStandardEncoding", 23},
+ {2006, "Ventura-US", 10},
+ {2006, "csVenturaUS", 11},
+ {2007, "Ventura-International", 21},
+ {2007, "csVenturaInternational", 22},
+ {2008, "DEC-MCS", 7},
+ {2008, "csDECMCS", 8},
+ {2008, "dec", 3},
+ {2009, "IBM850", 6},
+ {2009, "850", 3},
+ {2009, "cp850", 5},
+ {2009, "csPC850Multilingual", 19},
+ {2010, "IBM852", 6},
+ {2010, "852", 3},
+ {2010, "cp852", 5},
+ {2010, "csPCp852", 8},
+ {2011, "IBM437", 6},
+ {2011, "437", 3},
+ {2011, "cp437", 5},
+ {2011, "csPC8CodePage437", 16},
+ {2012, "PC8-Danish-Norwegian", 20},
+ {2012, "csPC8DanishNorwegian", 20},
+ {2013, "IBM862", 6},
+ {2013, "862", 3},
+ {2013, "cp862", 5},
+ {2013, "csPC862LatinHebrew", 18},
+ {2014, "PC8-Turkish", 11},
+ {2014, "csPC8Turkish", 12},
+ {2015, "IBM-Symbols", 11},
+ {2015, "csIBMSymbols", 12},
+ {2016, "IBM-Thai", 8},
+ {2016, "csIBMThai", 9},
+ {2017, "HP-Legal", 8},
+ {2017, "csHPLegal", 9},
+ {2018, "HP-Pi-font", 10},
+ {2018, "csHPPiFont", 10},
+ {2019, "HP-Math8", 8},
+ {2019, "csHPMath8", 9},
+ {2020, "Adobe-Symbol-Encoding", 21},
+ {2020, "csHPPSMath", 10},
+ {2021, "HP-DeskTop", 10},
+ {2021, "csHPDesktop", 11},
+ {2022, "Ventura-Math", 12},
+ {2022, "csVenturaMath", 13},
+ {2023, "Microsoft-Publishing", 20},
+ {2023, "csMicrosoftPublishing", 21},
+ {2024, "Windows-31J", 11},
+ {2024, "csWindows31J", 12},
+ {2025, "GB2312", 6},
+ {2025, "csGB2312", 8},
+ {2026, "Big5", 4},
+ {2026, "csBig5", 6},
+ {2027, "macintosh", 9},
+ {2027, "csMacintosh", 11},
+ {2027, "mac", 3},
+ {2028, "IBM037", 6},
+ {2028, "cp037", 5},
+ {2028, "csIBM037", 8},
+ {2028, "ebcdic-cp-ca", 12},
+ {2028, "ebcdic-cp-nl", 12},
+ {2028, "ebcdic-cp-us", 12},
+ {2028, "ebcdic-cp-wt", 12},
+ {2029, "IBM038", 6},
+ {2029, "EBCDIC-INT", 10},
+ {2029, "cp038", 5},
+ {2029, "csIBM038", 8},
+ {2030, "IBM273", 6},
+ {2030, "CP273", 5},
+ {2030, "csIBM273", 8},
+ {2031, "IBM274", 6},
+ {2031, "CP274", 5},
+ {2031, "EBCDIC-BE", 9},
+ {2031, "csIBM274", 8},
+ {2032, "IBM275", 6},
+ {2032, "EBCDIC-BR", 9},
+ {2032, "cp275", 5},
+ {2032, "csIBM275", 8},
+ {2033, "IBM277", 6},
+ {2033, "EBCDIC-CP-DK", 12},
+ {2033, "EBCDIC-CP-NO", 12},
+ {2033, "csIBM277", 8},
+ {2034, "IBM278", 6},
+ {2034, "CP278", 5},
+ {2034, "csIBM278", 8},
+ {2034, "ebcdic-cp-fi", 12},
+ {2034, "ebcdic-cp-se", 12},
+ {2035, "IBM280", 6},
+ {2035, "CP280", 5},
+ {2035, "csIBM280", 8},
+ {2035, "ebcdic-cp-it", 12},
+ {2036, "IBM281", 6},
+ {2036, "EBCDIC-JP-E", 11},
+ {2036, "cp281", 5},
+ {2036, "csIBM281", 8},
+ {2037, "IBM284", 6},
+ {2037, "CP284", 5},
+ {2037, "csIBM284", 8},
+ {2037, "ebcdic-cp-es", 12},
+ {2038, "IBM285", 6},
+ {2038, "CP285", 5},
+ {2038, "csIBM285", 8},
+ {2038, "ebcdic-cp-gb", 12},
+ {2039, "IBM290", 6},
+ {2039, "EBCDIC-JP-kana", 14},
+ {2039, "cp290", 5},
+ {2039, "csIBM290", 8},
+ {2040, "IBM297", 6},
+ {2040, "cp297", 5},
+ {2040, "csIBM297", 8},
+ {2040, "ebcdic-cp-fr", 12},
+ {2041, "IBM420", 6},
+ {2041, "cp420", 5},
+ {2041, "csIBM420", 8},
+ {2041, "ebcdic-cp-ar1", 13},
+ {2042, "IBM423", 6},
+ {2042, "cp423", 5},
+ {2042, "csIBM423", 8},
+ {2042, "ebcdic-cp-gr", 12},
+ {2043, "IBM424", 6},
+ {2043, "cp424", 5},
+ {2043, "csIBM424", 8},
+ {2043, "ebcdic-cp-he", 12},
+ {2044, "IBM500", 6},
+ {2044, "CP500", 5},
+ {2044, "csIBM500", 8},
+ {2044, "ebcdic-cp-be", 12},
+ {2044, "ebcdic-cp-ch", 12},
+ {2045, "IBM851", 6},
+ {2045, "851", 3},
+ {2045, "cp851", 5},
+ {2045, "csIBM851", 8},
+ {2046, "IBM855", 6},
+ {2046, "855", 3},
+ {2046, "cp855", 5},
+ {2046, "csIBM855", 8},
+ {2047, "IBM857", 6},
+ {2047, "857", 3},
+ {2047, "cp857", 5},
+ {2047, "csIBM857", 8},
+ {2048, "IBM860", 6},
+ {2048, "860", 3},
+ {2048, "cp860", 5},
+ {2048, "csIBM860", 8},
+ {2049, "IBM861", 6},
+ {2049, "861", 3},
+ {2049, "cp-is", 5},
+ {2049, "cp861", 5},
+ {2049, "csIBM861", 8},
+ {2050, "IBM863", 6},
+ {2050, "863", 3},
+ {2050, "cp863", 5},
+ {2050, "csIBM863", 8},
+ {2051, "IBM864", 6},
+ {2051, "cp864", 5},
+ {2051, "csIBM864", 8},
+ {2052, "IBM865", 6},
+ {2052, "865", 3},
+ {2052, "cp865", 5},
+ {2052, "csIBM865", 8},
+ {2053, "IBM868", 6},
+ {2053, "CP868", 5},
+ {2053, "cp-ar", 5},
+ {2053, "csIBM868", 8},
+ {2054, "IBM869", 6},
+ {2054, "869", 3},
+ {2054, "cp-gr", 5},
+ {2054, "cp869", 5},
+ {2054, "csIBM869", 8},
+ {2055, "IBM870", 6},
+ {2055, "CP870", 5},
+ {2055, "csIBM870", 8},
+ {2055, "ebcdic-cp-roece", 15},
+ {2055, "ebcdic-cp-yu", 12},
+ {2056, "IBM871", 6},
+ {2056, "CP871", 5},
+ {2056, "csIBM871", 8},
+ {2056, "ebcdic-cp-is", 12},
+ {2057, "IBM880", 6},
+ {2057, "EBCDIC-Cyrillic", 15},
+ {2057, "cp880", 5},
+ {2057, "csIBM880", 8},
+ {2058, "IBM891", 6},
+ {2058, "cp891", 5},
+ {2058, "csIBM891", 8},
+ {2059, "IBM903", 6},
+ {2059, "cp903", 5},
+ {2059, "csIBM903", 8},
+ {2060, "IBM904", 6},
+ {2060, "904", 3},
+ {2060, "cp904", 5},
+ {2060, "csIBBM904", 9},
+ {2061, "IBM905", 6},
+ {2061, "CP905", 5},
+ {2061, "csIBM905", 8},
+ {2061, "ebcdic-cp-tr", 12},
+ {2062, "IBM918", 6},
+ {2062, "CP918", 5},
+ {2062, "csIBM918", 8},
+ {2062, "ebcdic-cp-ar2", 13},
+ {2063, "IBM1026", 7},
+ {2063, "CP1026", 6},
+ {2063, "csIBM1026", 9},
+ {2064, "EBCDIC-AT-DE", 12},
+ {2064, "csIBMEBCDICATDE", 15},
+ {2065, "EBCDIC-AT-DE-A", 14},
+ {2065, "csEBCDICATDEA", 13},
+ {2066, "EBCDIC-CA-FR", 12},
+ {2066, "csEBCDICCAFR", 12},
+ {2067, "EBCDIC-DK-NO", 12},
+ {2067, "csEBCDICDKNO", 12},
+ {2068, "EBCDIC-DK-NO-A", 14},
+ {2068, "csEBCDICDKNOA", 13},
+ {2069, "EBCDIC-FI-SE", 12},
+ {2069, "csEBCDICFISE", 12},
+ {2070, "EBCDIC-FI-SE-A", 14},
+ {2070, "csEBCDICFISEA", 13},
+ {2071, "EBCDIC-FR", 9},
+ {2071, "csEBCDICFR", 10},
+ {2072, "EBCDIC-IT", 9},
+ {2072, "csEBCDICIT", 10},
+ {2073, "EBCDIC-PT", 9},
+ {2073, "csEBCDICPT", 10},
+ {2074, "EBCDIC-ES", 9},
+ {2074, "csEBCDICES", 10},
+ {2075, "EBCDIC-ES-A", 11},
+ {2075, "csEBCDICESA", 11},
+ {2076, "EBCDIC-ES-S", 11},
+ {2076, "csEBCDICESS", 11},
+ {2077, "EBCDIC-UK", 9},
+ {2077, "csEBCDICUK", 10},
+ {2078, "EBCDIC-US", 9},
+ {2078, "csEBCDICUS", 10},
+ {2079, "UNKNOWN-8BIT", 12},
+ {2079, "csUnknown8BiT", 13},
+ {2080, "MNEMONIC", 8},
+ {2080, "csMnemonic", 10},
+ {2081, "MNEM", 4},
+ {2081, "csMnem", 6},
+ {2082, "VISCII", 6},
+ {2082, "csVISCII", 8},
+ {2083, "VIQR", 4},
+ {2083, "csVIQR", 6},
+ {2084, "KOI8-R", 6},
+ {2084, "csKOI8R", 7},
+ {2085, "HZ-GB-2312", 10},
+ {2086, "IBM866", 6},
+ {2086, "866", 3},
+ {2086, "cp866", 5},
+ {2086, "csIBM866", 8},
+ {2087, "IBM775", 6},
+ {2087, "cp775", 5},
+ {2087, "csPC775Baltic", 13},
+ {2088, "KOI8-U", 6},
+ {2088, "csKOI8U", 7},
+ {2089, "IBM00858", 8},
+ {2089, "CCSID00858", 10},
+ {2089, "CP00858", 7},
+ {2089, "PC-Multilingual-850+euro", 24},
+ {2089, "csIBM00858", 10},
+ {2090, "IBM00924", 8},
+ {2090, "CCSID00924", 10},
+ {2090, "CP00924", 7},
+ {2090, "csIBM00924", 10},
+ {2090, "ebcdic-Latin9--euro", 19},
+ {2091, "IBM01140", 8},
+ {2091, "CCSID01140", 10},
+ {2091, "CP01140", 7},
+ {2091, "csIBM01140", 10},
+ {2091, "ebcdic-us-37+euro", 17},
+ {2092, "IBM01141", 8},
+ {2092, "CCSID01141", 10},
+ {2092, "CP01141", 7},
+ {2092, "csIBM01141", 10},
+ {2092, "ebcdic-de-273+euro", 18},
+ {2093, "IBM01142", 8},
+ {2093, "CCSID01142", 10},
+ {2093, "CP01142", 7},
+ {2093, "csIBM01142", 10},
+ {2093, "ebcdic-dk-277+euro", 18},
+ {2093, "ebcdic-no-277+euro", 18},
+ {2094, "IBM01143", 8},
+ {2094, "CCSID01143", 10},
+ {2094, "CP01143", 7},
+ {2094, "csIBM01143", 10},
+ {2094, "ebcdic-fi-278+euro", 18},
+ {2094, "ebcdic-se-278+euro", 18},
+ {2095, "IBM01144", 8},
+ {2095, "CCSID01144", 10},
+ {2095, "CP01144", 7},
+ {2095, "csIBM01144", 10},
+ {2095, "ebcdic-it-280+euro", 18},
+ {2096, "IBM01145", 8},
+ {2096, "CCSID01145", 10},
+ {2096, "CP01145", 7},
+ {2096, "csIBM01145", 10},
+ {2096, "ebcdic-es-284+euro", 18},
+ {2097, "IBM01146", 8},
+ {2097, "CCSID01146", 10},
+ {2097, "CP01146", 7},
+ {2097, "csIBM01146", 10},
+ {2097, "ebcdic-gb-285+euro", 18},
+ {2098, "IBM01147", 8},
+ {2098, "CCSID01147", 10},
+ {2098, "CP01147", 7},
+ {2098, "csIBM01147", 10},
+ {2098, "ebcdic-fr-297+euro", 18},
+ {2099, "IBM01148", 8},
+ {2099, "CCSID01148", 10},
+ {2099, "CP01148", 7},
+ {2099, "csIBM01148", 10},
+ {2099, "ebcdic-international-500+euro", 29},
+ {2100, "IBM01149", 8},
+ {2100, "CCSID01149", 10},
+ {2100, "CP01149", 7},
+ {2100, "csIBM01149", 10},
+ {2100, "ebcdic-is-871+euro", 18},
+ {2101, "Big5-HKSCS", 10},
+ {2101, "csBig5HKSCS", 11},
+ {2102, "IBM1047", 7},
+ {2102, "IBM-1047", 8},
+ {2102, "csIBM1047", 9},
+ {2103, "PTCP154", 7},
+ {2103, "CP154", 5},
+ {2103, "Cyrillic-Asian", 14},
+ {2103, "PT154", 5},
+ {2103, "csPTCP154", 9},
+ {2104, "Amiga-1251", 10},
+ {2104, "Ami-1251", 8},
+ {2104, "Ami1251", 7},
+ {2104, "Amiga1251", 9},
+ {2104, "csAmiga1251", 11},
+ {2105, "KOI7-switched", 13},
+ {2105, "csKOI7switched", 14},
+ {2106, "BRF", 3},
+ {2106, "csBRF", 5},
+ {2107, "TSCII", 5},
+ {2107, "csTSCII", 7},
+ {2108, "CP51932", 7},
+ {2108, "csCP51932", 9},
+ {2109, "windows-874", 11},
+ {2109, "cswindows874", 12},
+ {2250, "windows-1250", 12},
+ {2250, "cswindows1250", 13},
+ {2251, "windows-1251", 12},
+ {2251, "cswindows1251", 13},
+ {2252, "windows-1252", 12},
+ {2252, "cswindows1252", 13},
+ {2253, "windows-1253", 12},
+ {2253, "cswindows1253", 13},
+ {2254, "windows-1254", 12},
+ {2254, "cswindows1254", 13},
+ {2255, "windows-1255", 12},
+ {2255, "cswindows1255", 13},
+ {2256, "windows-1256", 12},
+ {2256, "cswindows1256", 13},
+ {2257, "windows-1257", 12},
+ {2257, "cswindows1257", 13},
+ {2258, "windows-1258", 12},
+ {2258, "cswindows1258", 13},
+ {2259, "TIS-620", 7},
+ {2259, "ISO-8859-11", 11},
+ {2259, "csTIS620", 8},
+ {2260, "CP50220", 7},
+ {2260, "csCP50220", 9},
};
const __encoding_data* __encoding_rep_ = __text_encoding_data + 1;
>From 4a6b5a1bf2476e978e33a76865a73cff501c1092 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sat, 24 May 2025 23:44:36 -0400
Subject: [PATCH 19/54] Build string_views explicitly when passing in
__encoding_data names
---
libcxx/include/__text_encoding/text_encoding.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index a8d570ae58e25..7d21d763edc3e 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -45,7 +45,8 @@ struct text_encoding {
uint_least32_t __name_size_;
friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) noexcept {
- return __e.__mib_rep_ == __other.__mib_rep_ || __comp_name(__e.__name_, __other.__name_);
+ return __e.__mib_rep_ == __other.__mib_rep_ ||
+ __comp_name(string_view(__e.__name_, __e.__name_size_), string_view(__other.__name_, __e.__name_size_));
}
friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) noexcept {
@@ -532,7 +533,7 @@ struct text_encoding {
auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data) - 1;
for (; __data_ptr != __data_last; __data_ptr++) {
- if (__comp_name(__a, __data_ptr->__name_)) {
+ if (__comp_name(__a, string_view(__data_ptr->__name_, __data_ptr->__name_size_))) {
const auto __found_id = __data_ptr->__mib_rep_;
while (__data_ptr[-1].__mib_rep_ == __found_id)
__data_ptr--;
>From fe54c99115a0f29aa19d269e9fd79f1600886f73 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sun, 25 May 2025 00:32:59 -0400
Subject: [PATCH 20/54] Assert against numbers that are in the range but not
part of the id enum
---
libcxx/include/__text_encoding/text_encoding.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 7d21d763edc3e..ddab9ee022aa2 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -545,7 +545,8 @@ struct text_encoding {
}
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
- _LIBCPP_ASSERT(__i >= id::other && __i <= id::CP50220, "Passing invalid id to text_encoding constructor!");
+ _LIBCPP_ASSERT(__i >= id::other && __i <= id::CP50220 && __id_rep(__i) != 33 && __id_rep(__i) != 34,
+ "Passing invalid id to text_encoding constructor!");
auto __found = std::lower_bound(std::begin(__text_encoding_data), std::end(__text_encoding_data), __id_rep(__i));
return __found != std::end(__text_encoding_data)
? __found
>From 6bd85a0c8ba5ad923e14f89b9a52c3ba9fbbaed6 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sun, 25 May 2025 00:34:01 -0400
Subject: [PATCH 21/54] Remove invalid data pairs from test set
---
libcxx/test/support/test_text_encoding.h | 9 ---------
1 file changed, 9 deletions(-)
diff --git a/libcxx/test/support/test_text_encoding.h b/libcxx/test/support/test_text_encoding.h
index 460a259722f0d..f7a342bed785d 100644
--- a/libcxx/test/support/test_text_encoding.h
+++ b/libcxx/test/support/test_text_encoding.h
@@ -169,12 +169,6 @@ constexpr encoding_pair all_encoding_data[] = {
{32, "NATS-SEFI-ADD"},
{32, "csNATSSEFIADD"},
{32, "iso-ir-8-2"},
- {33, "NATS-DANO"},
- {33, "csNATSDANO"},
- {33, "iso-ir-9-1"},
- {34, "NATS-DANO-ADD"},
- {34, "csNATSDANOADD"},
- {34, "iso-ir-9-2"},
{35, "SEN_850200_B"},
{35, "FI"},
{35, "ISO646-FI"},
@@ -906,7 +900,6 @@ constexpr encoding_pair all_encoding_data[] = {
{2259, "csTIS620"},
{2260, "CP50220"},
{2260, "csCP50220"},
- {0, nullptr} // sentinel
};
constexpr encoding_pair unique_encoding_data[] = {
@@ -940,8 +933,6 @@ constexpr encoding_pair unique_encoding_data[] = {
{30, "ISO_646.irv:1983"},
{31, "NATS-SEFI"},
{32, "NATS-SEFI-ADD"},
- {33, "NATS-DANO"},
- {34, "NATS-DANO-ADD"},
{35, "SEN_850200_B"},
{36, "KS_C_5601-1987"},
{37, "ISO-2022-KR"},
>From e3da3385607653fe3f85c6c564ee34ca7903544e Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sun, 25 May 2025 01:12:46 -0400
Subject: [PATCH 22/54] Stop aliases() from going before the first alias name
---
libcxx/include/__text_encoding/text_encoding.h | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index ddab9ee022aa2..de763dcaf7eb2 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -450,10 +450,11 @@ struct text_encoding {
};
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const noexcept {
- auto __rep = __encoding_rep_ - 1;
+ auto __rep = __encoding_rep_;
if (__encoding_rep_->__name_[0]) {
- while (__rep > std::begin(__text_encoding_data) && (__rep--)->__mib_rep_ == __encoding_rep_->__mib_rep_)
- ;
+ while (__rep > std::begin(__text_encoding_data) && __rep[-1].__mib_rep_ == __encoding_rep_->__mib_rep_) {
+ __rep--;
+ }
} else {
__rep = nullptr;
}
>From f2aad2ffb2877fc65859886f4c3ca03197f4c5a8 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sun, 25 May 2025 01:18:29 -0400
Subject: [PATCH 23/54] Remove out-of-bounds tests since they're not allowed
and add a ranges::contains check for the name.
---
.../text_encoding/text_encoding.ctor/id.pass.cpp | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
index f01c3956e4736..f9808c545b4d5 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
@@ -22,18 +22,22 @@
// 3. Constructing an object using id::unknown must set mib() to id::unknown and the name to an empty string.
// 4. Constructing an object using id::other must set mib() to id::other and the name to an empty string.
-#include "test_text_encoding.h"
+#include <algorithm>
#include <cassert>
+#include <print>
#include <string_view>
#include <text_encoding>
#include <type_traits>
+#include "test_text_encoding.h"
+
using te_id = std::text_encoding::id;
constexpr void test_ctor(te_id i, te_id expect_id, std::string_view expect_name) {
auto te = std::text_encoding(i);
assert(te.mib() == expect_id);
assert(expect_name.compare(te.name()) == 0);
+ assert(std::ranges::contains(te.aliases(), std::string_view(te.name())));
}
int main() {
@@ -47,10 +51,4 @@ int main() {
test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name);
}
}
-
- {
- for (int i = 2261; i < 2300; i++) { // test out of range id values
- test_ctor(te_id{i}, te_id::unknown, "");
- }
- }
}
>From 5fb37481fc56d01955bbe36b7c1c29f6003f3aee Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Sun, 25 May 2025 01:47:45 -0400
Subject: [PATCH 24/54] Make __can_dereference constexpr
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index de763dcaf7eb2..5e1153804509d 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -435,7 +435,7 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI constexpr __iterator(const __encoding_data* __enc_d) noexcept
: __data_(__enc_d), __mib_rep_(__enc_d ? __enc_d->__mib_rep_ : 0) {}
- _LIBCPP_HIDE_FROM_ABI bool __can_dereference() const { return __data_ && __data_->__mib_rep_ == __mib_rep_; }
+ _LIBCPP_HIDE_FROM_ABI constexpr bool __can_dereference() const { return __data_ && __data_->__mib_rep_ == __mib_rep_; }
// default iterator is a sentinel
const __encoding_data* __data_ = nullptr;
>From 89351bade43b4cd2bd5993b1d2e6357a8303eb31 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 19:45:23 -0400
Subject: [PATCH 25/54] Don't open ranges namespace
---
libcxx/include/__text_encoding/text_encoding.h | 6 +-----
1 file changed, 1 insertion(+), 5 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 5e1153804509d..1625816863534 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -1448,12 +1448,8 @@ struct hash<text_encoding> {
size_t operator()(const text_encoding& __enc) const noexcept { return std::hash<text_encoding::id>()(__enc.mib()); }
};
-namespace ranges {
-
template <>
-inline constexpr bool enable_borrowed_range<text_encoding::aliases_view> = true;
-
-} // namespace ranges
+inline constexpr bool ranges::enable_borrowed_range<text_encoding::aliases_view> = true;
_LIBCPP_END_NAMESPACE_STD
>From e8abce4d196272d07de0782ce94cb2afd854b85f Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 20:26:48 -0400
Subject: [PATCH 26/54] Add nl_langinfo_l to locale base API
---
libcxx/include/__locale_dir/support/bsd_like.h | 11 ++++++++++-
libcxx/include/__locale_dir/support/fuchsia.h | 10 ++++++++++
libcxx/include/__locale_dir/support/linux.h | 10 ++++++++++
3 files changed, 30 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h
index 54eb397358d7a..9a7c4a39c34a0 100644
--- a/libcxx/include/__locale_dir/support/bsd_like.h
+++ b/libcxx/include/__locale_dir/support/bsd_like.h
@@ -23,7 +23,9 @@
# include <wchar.h>
# include <wctype.h>
#endif
-
+#if __has_include(<langinfo.h>)
+# include <langinfo.h>
+#endif
#include <xlocale.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -60,6 +62,13 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __loc
}
inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return ::localeconv_l(__loc); }
+
+# if __has_include(<langinfo.h>)
+inline _LIBCPP_HIDE_FROM_ABI char* __nl_langinfo_l(int __category_mask, __locale_t __l) {
+ return ::nl_langinfo_l(__category_mask, __l);
+}
+# endif
+
#endif // _LIBCPP_BUILDING_LIBRARY
//
diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h
index 4b9e63facb19e..e987c960987b3 100644
--- a/libcxx/include/__locale_dir/support/fuchsia.h
+++ b/libcxx/include/__locale_dir/support/fuchsia.h
@@ -16,6 +16,10 @@
#include <cstdlib>
#include <cwchar>
+#if __has_include(<langinfo.h>)
+# include <langinfo.h>
+#endif
+
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
#endif
@@ -69,6 +73,12 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) {
return std::localeconv();
}
+# if __has_include(<langinfo.h>)
+inline _LIBCPP_HIDE_FROM_ABI char* __nl_langinfo_l(int __category_mask, __locale_t __l) {
+ return ::nl_langinfo_l(__category_mask, __l);
+}
+# endif
+
//
// Other functions
//
diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h
index fa0b03c646a2a..236d704bcf1c4 100644
--- a/libcxx/include/__locale_dir/support/linux.h
+++ b/libcxx/include/__locale_dir/support/linux.h
@@ -24,6 +24,9 @@
# include <cwchar>
# include <wctype.h>
#endif
+#if __has_include(<langinfo.h>)
+# include <langinfo.h>
+#endif
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -77,6 +80,13 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) {
__locale_guard __current(__loc);
return std::localeconv();
}
+
+# if __has_include(<langinfo.h>)
+inline _LIBCPP_HIDE_FROM_ABI char* __nl_langinfo_l(int __category_mask, __locale_t __l) {
+ return ::nl_langinfo_l(__category_mask, __l);
+}
+# endif
+
#endif // _LIBCPP_BUILDING_LIBRARY
//
>From e374ebe322dfc0dfe197b5fd9485e50a5d785f2b Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 20:41:11 -0400
Subject: [PATCH 27/54] Update source file to use internal nl_langinfo_l
---
libcxx/src/text_encoding.cpp | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 7557d28ab0fd8..5ea7a3ab95d81 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -13,13 +13,8 @@
#endif
#include <__locale_dir/locale_base_api.h>
-
#include <text_encoding>
-#if __has_include(<langinfo.h>)
-# include <langinfo.h>
-#endif
-
#if _LIBCPP_STD_VER >= 26
_LIBCPP_BEGIN_NAMESPACE_STD
@@ -28,7 +23,7 @@ text_encoding text_encoding::environment() {
auto __make_locale = [](const char* __name) {
text_encoding __enc{};
if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
- if (const char* __codeset = nl_langinfo_l(CODESET, __loc)) {
+ if (const char* __codeset = __locale::__nl_langinfo_l(CODESET, __loc)) {
string_view __s(__codeset);
if (__s.size() < max_name_length)
__enc = text_encoding(__s);
@@ -37,7 +32,6 @@ text_encoding text_encoding::environment() {
}
return __enc;
};
-
return __make_locale("");
}
>From 75bab0cd4788e4998eb22634a338f0471c6fb841 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 20:46:20 -0400
Subject: [PATCH 28/54] Bump CXX_STANDARD to 26 for now
---
libcxx/CMakeLists.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index dffdd7a3c70a6..e82087f62274a 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -1,7 +1,7 @@
#===============================================================================
# Setup Project
#===============================================================================
-cmake_minimum_required(VERSION 3.20.0)
+cmake_minimum_required(VERSION 3.25.0)
set(LLVM_SUBPROJECT_TITLE "libc++")
set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
@@ -499,7 +499,7 @@ function(cxx_add_basic_build_flags target)
# Use C++23 for all targets.
set_target_properties(${target} PROPERTIES
- CXX_STANDARD 23
+ CXX_STANDARD 26
CXX_STANDARD_REQUIRED OFF # TODO: Make this REQUIRED once we don't need to accommodate the LLVM documentation builders using an ancient CMake
CXX_EXTENSIONS NO)
>From b4db1e7c055160b06b8112d6ba176f8821222fe9 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 20:46:55 -0400
Subject: [PATCH 29/54] Remove std version guard in text_encoding.cpp
---
libcxx/src/text_encoding.cpp | 4 ----
1 file changed, 4 deletions(-)
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 5ea7a3ab95d81..eec3b0c808244 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -15,8 +15,6 @@
#include <__locale_dir/locale_base_api.h>
#include <text_encoding>
-#if _LIBCPP_STD_VER >= 26
-
_LIBCPP_BEGIN_NAMESPACE_STD
text_encoding text_encoding::environment() {
@@ -36,5 +34,3 @@ text_encoding text_encoding::environment() {
}
_LIBCPP_END_NAMESPACE_STD
-
-#endif // _LIBCPP_STD_VER > 26
>From e37706d6e70704447a5eb392dcdde5e148a6ca50 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 21:00:13 -0400
Subject: [PATCH 30/54] Update __find_encoding_data_by_id comment
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 1625816863534..56f689a34b340 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -551,7 +551,7 @@ struct text_encoding {
auto __found = std::lower_bound(std::begin(__text_encoding_data), std::end(__text_encoding_data), __id_rep(__i));
return __found != std::end(__text_encoding_data)
? __found
- : __text_encoding_data + 1; // only possible way to get unknown is if 33, 34 are passed
+ : __text_encoding_data + 1; // unknown, should be unreachable
}
_LIBCPP_HIDE_FROM_ABI static constexpr __encoding_data __text_encoding_data[] = {
>From 25dce4f1ca8d8102940876de0531e27ded0636fa Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Mon, 26 May 2025 21:31:45 -0400
Subject: [PATCH 31/54] Update tests
---
.../text_encoding.ctor/default.pass.cpp | 9 ++-
.../text_encoding.ctor/id.pass.cpp | 68 +++++++++++++++++--
.../text_encoding.ctor/string_view.pass.cpp | 37 ++++++----
.../text_encoding.eq/equal.id.pass.cpp | 1 -
.../text_encoding.eq/equal.pass.cpp | 1 -
.../text_encoding.members/aliases.pass.cpp | 4 +-
.../environment.pass.cpp | 3 -
.../text_encoding.members/literal.pass.cpp | 6 --
.../nodiscard.verify.cpp | 37 ++++++++++
.../text_encoding.aliases_view/begin.pass.cpp | 6 +-
.../text_encoding.aliases_view/empty.pass.cpp | 6 +-
.../text_encoding.aliases_view/front.pass.cpp | 6 +-
libcxx/test/support/test_text_encoding.h | 9 ++-
13 files changed, 140 insertions(+), 53 deletions(-)
create mode 100644 libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
index 62c30d7295491..2baa7bd298116 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/default.pass.cpp
@@ -21,7 +21,7 @@
// 2. Default constructing a text_encoding object makes it so that mib() == id::unknown, and its name is empty
#include <cassert>
-#include <cstring>
+#include <string_view>
#include <text_encoding>
#include <type_traits>
@@ -34,6 +34,11 @@ int main(int, char**) {
{
auto te = std::text_encoding();
assert(te.mib() == std::text_encoding::id::unknown);
- assert(strcmp(te.name(), "") == 0);
+ assert(std::string_view("").compare(te.name()) == 0);
+ }
+ {
+ constexpr auto te = std::text_encoding();
+ static_assert(te.mib() == std::text_encoding::id::unknown);
+ static_assert(std::string_view("").compare(te.name()) == 0);
}
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
index f9808c545b4d5..f04e4df1a1d38 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/id.pass.cpp
@@ -33,11 +33,49 @@
using te_id = std::text_encoding::id;
-constexpr void test_ctor(te_id i, te_id expect_id, std::string_view expect_name) {
+constexpr bool test_ctor(te_id i, te_id expect_id, std::string_view expect_name) {
auto te = std::text_encoding(i);
- assert(te.mib() == expect_id);
- assert(expect_name.compare(te.name()) == 0);
- assert(std::ranges::contains(te.aliases(), std::string_view(te.name())));
+ if (te.mib() != expect_id) {
+ return false;
+ }
+ if (expect_name.compare(te.name()) != 0) {
+ return false;
+ }
+ if (!std::ranges::contains(te.aliases(), std::string_view(te.name()))) {
+ return false;
+ }
+ return true;
+}
+
+constexpr bool test_ctors_static() {
+ for (auto pair : unique_encoding_data) {
+ if (!test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+constexpr bool test_unknown() {
+ constexpr auto te = std::text_encoding(te_id::unknown);
+ if (te.mib() != te_id::unknown) {
+ return false;
+ }
+ if (std::string_view(te.name()).compare(te.name()) != 0) {
+ return false;
+ }
+ return true;
+}
+
+constexpr bool test_other() {
+ constexpr auto te = std::text_encoding(te_id::other);
+ if (te.mib() != te_id::other) {
+ return false;
+ }
+ if (std::string_view(te.name()).compare(te.name()) != 0) {
+ return false;
+ }
+ return true;
}
int main() {
@@ -48,7 +86,27 @@ int main() {
{
for (auto pair : unique_encoding_data) {
- test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name);
+ assert(test_ctor(te_id{pair.mib}, te_id{pair.mib}, pair.name));
}
}
+
+ {
+ static_assert(test_ctors_static());
+ }
+
+ {
+ static_assert(test_unknown());
+ }
+
+ {
+ assert(test_unknown());
+ }
+
+ {
+ static_assert(test_other());
+ }
+
+ {
+ assert(test_other());
+ }
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
index b72adf0274cb2..09d86003a4a32 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.ctor/string_view.pass.cpp
@@ -16,24 +16,27 @@
// text_encoding::text_encoding(string_view) noexcept
-#include "test_macros.h"
-#include "test_text_encoding.h"
#include <cassert>
#include <cstring>
#include <string_view>
#include <text_encoding>
#include <type_traits>
-constexpr void test_ctor(std::string_view str, std::string_view expect, std::text_encoding::id expect_id) {
+#include "test_macros.h"
+#include "test_text_encoding.h"
+
+constexpr bool test_ctor(std::string_view str, std::string_view expect, std::text_encoding::id expect_id) {
auto te = std::text_encoding(str);
- assert(te.mib() == expect_id);
- assert(expect.compare(te.name()) == 0);
+ return te.mib() == expect_id && expect.compare(te.name()) == 0;
}
-void test_correct_encoding_spellings() {
+constexpr bool test_correct_encoding_spellings() {
for (auto pair : unique_encoding_data) {
- test_ctor(pair.name, pair.name, std::text_encoding::id{pair.mib});
+ if (!test_ctor(pair.name, pair.name, std::text_encoding::id{pair.mib})) {
+ return false;
+ }
}
+ return true;
}
int main() {
@@ -44,30 +47,36 @@ int main() {
// happy paths
{
- test_correct_encoding_spellings();
+ assert(test_correct_encoding_spellings());
}
{
- test_ctor("U_T_F-8", "U_T_F-8", std::text_encoding::UTF8);
+ static_assert(test_ctor("U_T_F-8", "U_T_F-8", std::text_encoding::UTF8));
+ assert(test_ctor("U_T_F-8", "U_T_F-8", std::text_encoding::UTF8));
}
{
- test_ctor("utf8", "utf8", std::text_encoding::UTF8);
+ static_assert(test_ctor("utf8", "utf8", std::text_encoding::UTF8));
+ assert(test_ctor("utf8", "utf8", std::text_encoding::UTF8));
}
{
- test_ctor("u.t.f-008", "u.t.f-008", std::text_encoding::UTF8);
+ static_assert(test_ctor("u.t.f-008", "u.t.f-008", std::text_encoding::UTF8));
+ assert(test_ctor("u.t.f-008", "u.t.f-008", std::text_encoding::UTF8));
}
{
- test_ctor("utf-80", "utf-80", std::text_encoding::other);
+ static_assert(test_ctor("utf-80", "utf-80", std::text_encoding::other));
+ assert(test_ctor("utf-80", "utf-80", std::text_encoding::other));
}
{
- test_ctor("iso885931988", "iso885931988", std::text_encoding::ISOLatin3);
+ static_assert(test_ctor("iso885931988", "iso885931988", std::text_encoding::ISOLatin3));
+ assert(test_ctor("iso885931988", "iso885931988", std::text_encoding::ISOLatin3));
}
{
- test_ctor("iso00885931988", "iso00885931988", std::text_encoding::ISOLatin3);
+ static_assert(test_ctor("iso00885931988", "iso00885931988", std::text_encoding::ISOLatin3));
+ assert(test_ctor("iso00885931988", "iso00885931988", std::text_encoding::ISOLatin3));
}
}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
index bf0c75a3ebbb5..f7b2af8299490 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.id.pass.cpp
@@ -23,7 +23,6 @@
#include <cassert>
#include <text_encoding>
-#include <type_traits>
#include "test_macros.h"
#include "test_text_encoding.h"
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
index 27cd9bdff08a5..c44bff8239796 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.eq/equal.pass.cpp
@@ -25,7 +25,6 @@
#include <cassert>
#include <text_encoding>
-#include <type_traits>
#include "test_macros.h"
#include "test_text_encoding.h"
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
index 6b363ef7555e6..77fb2779de809 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/aliases.pass.cpp
@@ -9,11 +9,10 @@
// <text_encoding>
// REQUIRES: std-at-least-c++26
-// UNSUPPORTED: no-localization
// class text_encoding
-// text_encoding text_encoding::environment();
+// text_encoding text_encoding::aliases();
// Concerns:
// 1. Verify that text_encoding::aliases_view satisfies ranges::forward_range, copyable, view,
@@ -22,7 +21,6 @@
#include <concepts>
#include <ranges>
#include <text_encoding>
-#include <type_traits>
#include "platform_support.h"
#include "test_macros.h"
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
index cba72e45f0666..2aaa6859ad6a9 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
@@ -14,9 +14,6 @@
// UNSUPPORTED: no-localization
// UNSUPPORTED: windows
-// libc++ is not built with C++26, and the implementation for this function is in a source file.
-// XFAIL: *
-
// class text_encoding
// text_encoding text_encoding::environment();
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
index 4aa6d793000b3..7a8cb7cad425c 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
@@ -11,8 +11,6 @@
// REQUIRES: std-at-least-c++26
-// UNSUPPORTED: no-localization
-
// class text_encoding
// text_encoding text_encoding::literal() noexcept;
@@ -22,7 +20,6 @@
#include <cassert>
#include <text_encoding>
-#include <type_traits>
#include <string_view>
#include "test_macros.h"
@@ -37,9 +34,6 @@ int main() {
assert(std::string_view(te.name()) == std::string_view(__GNUC_EXECUTION_CHARSET_NAME));
# elif defined(__clang_literal_encoding__)
assert(std::string_view(te.name()) == std::string_view(__clang_literal_encoding__));
-# elif defined(__clang__)
- assert(std::string_view(te.name()) == "UTF-8");
- assert(te.mib() == std::text_encoding::id::UTF8);
# else
assert(te.mib() = std::text_encoding::id::unknown);
# endif
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
new file mode 100644
index 0000000000000..5aa43c86e4eff
--- /dev/null
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
@@ -0,0 +1,37 @@
+
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <text_encoding>
+
+// REQUIRES: std-at-least-c++26
+
+// class text_encoding
+
+// Concerns:
+// 1. Verify that text_encoding member functions are nodiscard
+
+#include <text_encoding>
+
+int main() {
+ auto te = std::text_encoding();
+ // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+ te.mib();
+ // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+ te.name();
+ // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+ te.aliases();
+ // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+ te.environment();
+ // expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
+ te.environment_is<std::text_encoding::id::unknown>();
+
+ // Clang does not emit a nodiscard warning for consteval with [[nodiscard]]: See #141536
+ // expected-warning at +1 {{expression result unused}}
+ std::text_encoding::literal();
+}
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
index ecb98403fc821..03a9f763ad9a8 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp
@@ -10,11 +10,7 @@
// REQUIRES: std-at-least-c++26
-// UNSUPPORTED: no-localization
-
-// class text_encoding
-
-// text_encoding text_encoding::environment();
+// struct text_encoding::aliases_view
// Concerns:
// 1. begin() of an aliases_view() from a single text_encoding object are the same.
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
index 6105655351675..f645dfa7897d4 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp
@@ -10,11 +10,7 @@
// REQUIRES: std-at-least-c++26
-// UNSUPPORTED: no-localization
-
-// class text_encoding
-
-// text_encoding text_encoding::environment();
+// struct text_encoding::aliases_view
// Concerns:
// 1. An alias_view of a text_encoding object for "other" is empty
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
index 9066e1e9f8da9..bc65891c08f09 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp
@@ -10,11 +10,7 @@
// REQUIRES: std-at-least-c++26
-// UNSUPPORTED: no-localization
-
-// class text_encoding
-
-// text_encoding text_encoding::environment();
+// struct text_encoding::aliases_view
// Concerns:
// 1. An aliases_view from a single text_encoding object returns the same front()
diff --git a/libcxx/test/support/test_text_encoding.h b/libcxx/test/support/test_text_encoding.h
index f7a342bed785d..194a4d4830330 100644
--- a/libcxx/test/support/test_text_encoding.h
+++ b/libcxx/test/support/test_text_encoding.h
@@ -10,6 +10,7 @@
#define SUPPORT_TEST_TEXT_ENCODING_H
#include "test_macros.h"
+#include <array>
#include <cstdint>
struct encoding_pair {
@@ -17,7 +18,8 @@ struct encoding_pair {
const char* name;
};
-constexpr encoding_pair all_encoding_data[] = {
+constexpr std::array<const encoding_pair, 882> all_encoding_data{ {
+
{1, ""},
{2, ""},
{3, "ANSI_X3.4-1968"},
@@ -900,9 +902,10 @@ constexpr encoding_pair all_encoding_data[] = {
{2259, "csTIS620"},
{2260, "CP50220"},
{2260, "csCP50220"},
+ }
};
-constexpr encoding_pair unique_encoding_data[] = {
+constexpr std::array<const encoding_pair, 256> unique_encoding_data{{
{3, "ANSI_X3.4-1968"},
{4, "ISO-8859-1"},
{5, "ISO-8859-2"},
@@ -1159,6 +1162,6 @@ constexpr encoding_pair unique_encoding_data[] = {
{2258, "windows-1258"},
{2259, "TIS-620"},
{2260, "CP50220"},
-};
+}};
#endif // SUPPORT_TEST_TEXT_ENCODING_H
>From bb466f453f79defdf0b85acb7b99b98f952e6334 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Tue, 27 May 2025 22:29:10 -0400
Subject: [PATCH 32/54] Annotate environment() function to
_LIBCPP_EXPORTED_FROM_ABI
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
libcxx/src/text_encoding.cpp | 2 ++
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 56f689a34b340..a12e225c99ce0 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -484,7 +484,7 @@ struct text_encoding {
# endif
}
- [[nodiscard]] static text_encoding environment();
+ [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI static text_encoding environment();
template <id __i>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() {
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index eec3b0c808244..4f3077e79fe9b 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -16,6 +16,7 @@
#include <text_encoding>
_LIBCPP_BEGIN_NAMESPACE_STD
+_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS
text_encoding text_encoding::environment() {
auto __make_locale = [](const char* __name) {
@@ -33,4 +34,5 @@ text_encoding text_encoding::environment() {
return __make_locale("");
}
+_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS
_LIBCPP_END_NAMESPACE_STD
>From 6899ea03c557bd3d0ed5fd7e5119851d95c45cb3 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Tue, 27 May 2025 22:29:22 -0400
Subject: [PATCH 33/54] Add text_encoding.cpp to LIBCXX_SOURCES
---
libcxx/src/CMakeLists.txt | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 1a98812894896..ea28fbe1c9250 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -54,6 +54,7 @@ set(LIBCXX_SOURCES
support/runtime/stdexcept_default.ipp
support/runtime/stdexcept_vcruntime.ipp
system_error.cpp
+ text_encoding.cpp
typeinfo.cpp
valarray.cpp
variant.cpp
@@ -92,7 +93,6 @@ if (LIBCXX_ENABLE_LOCALIZATION)
ostream.cpp
regex.cpp
strstream.cpp
- text_encoding.cpp
)
endif()
>From 7b51ecaee0cb7e1567b2d772e602536dbc450ede Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Tue, 27 May 2025 22:32:22 -0400
Subject: [PATCH 34/54] Format tests
---
.../text_encoding/text_encoding.members/literal.pass.cpp | 2 +-
.../text_encoding/text_encoding.members/nodiscard.verify.cpp | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
index 7a8cb7cad425c..df58d13bdab6b 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/literal.pass.cpp
@@ -19,8 +19,8 @@
// 1. text_encoding::literal() returns the proper encoding depending on the compiler, else unknown.
#include <cassert>
-#include <text_encoding>
#include <string_view>
+#include <text_encoding>
#include "test_macros.h"
#include "test_text_encoding.h"
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
index 5aa43c86e4eff..e5462d030d844 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/nodiscard.verify.cpp
@@ -31,7 +31,7 @@ int main() {
// expected-warning at +1 {{ignoring return value of function declared with 'nodiscard' attribute}}
te.environment_is<std::text_encoding::id::unknown>();
- // Clang does not emit a nodiscard warning for consteval with [[nodiscard]]: See #141536
+ // Clang does not emit a nodiscard warning for consteval functions with [[nodiscard]]: See #141536
// expected-warning at +1 {{expression result unused}}
std::text_encoding::literal();
}
>From c0d72e084f4b4c5692a94650e3c87ad7627d4a05 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Tue, 27 May 2025 22:36:07 -0400
Subject: [PATCH 35/54] Undo CXX_STANDARD change
---
libcxx/CMakeLists.txt | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index e82087f62274a..dffdd7a3c70a6 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -1,7 +1,7 @@
#===============================================================================
# Setup Project
#===============================================================================
-cmake_minimum_required(VERSION 3.25.0)
+cmake_minimum_required(VERSION 3.20.0)
set(LLVM_SUBPROJECT_TITLE "libc++")
set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
@@ -499,7 +499,7 @@ function(cxx_add_basic_build_flags target)
# Use C++23 for all targets.
set_target_properties(${target} PROPERTIES
- CXX_STANDARD 26
+ CXX_STANDARD 23
CXX_STANDARD_REQUIRED OFF # TODO: Make this REQUIRED once we don't need to accommodate the LLVM documentation builders using an ancient CMake
CXX_EXTENSIONS NO)
>From 7baa1402b5d8103cf14c4e1899cdfa98f80ebdf1 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Tue, 27 May 2025 22:44:50 -0400
Subject: [PATCH 36/54] Format again
---
.../locales/locale/locale.members/encoding.pass.cpp | 3 ---
1 file changed, 3 deletions(-)
diff --git a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
index 806a025b82e2d..b64928ccb146e 100644
--- a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
+++ b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp
@@ -1,4 +1,3 @@
-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -9,8 +8,6 @@
// <text_encoding>
-// libc++ not built with C++26 yet
-// XFAIL: *
// REQUIRES: std-at-least-c++26
// REQUIRES: locale.en_US.UTF-8
// UNSUPPORTED: no-localization
>From 76302e35c873798490d1209f546cef5fb7255c6e Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 00:19:42 -0400
Subject: [PATCH 37/54] Add __encoding_data operator==(string_view) and use
std::find to search for encoding_data by name
---
.../include/__text_encoding/text_encoding.h | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index a12e225c99ce0..1b97b24f90e9b 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -17,6 +17,7 @@
#endif
#include <__algorithm/copy_n.h>
+#include <__algorithm/find.h>
#include <__algorithm/lower_bound.h>
#include <__algorithm/min.h>
#include <__assert>
@@ -52,6 +53,10 @@ struct text_encoding {
friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) noexcept {
return __e.__mib_rep_ < __i;
}
+
+ friend constexpr bool operator==(const __encoding_data& __e, std::string_view __name) noexcept {
+ return __comp_name(__name, string_view(__e.__name_, __e.__name_size_));
+ }
};
public:
@@ -531,18 +536,10 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data(string_view __a) {
_LIBCPP_ASSERT(__a.size() <= max_name_length, "Passing encoding name longer than max_name_length!");
- auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data) - 1;
-
- for (; __data_ptr != __data_last; __data_ptr++) {
- if (__comp_name(__a, string_view(__data_ptr->__name_, __data_ptr->__name_size_))) {
- const auto __found_id = __data_ptr->__mib_rep_;
- while (__data_ptr[-1].__mib_rep_ == __found_id)
- __data_ptr--;
- return __data_ptr;
- }
- }
+ auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data);
+ auto __found_data = std::find(__data_ptr, __data_last, __a);
- return __text_encoding_data; // other
+ return __found_data != __data_last ? __found_data : __text_encoding_data; // other
}
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
>From 1456bcb3b8a231969b0b60551c49aee2ddbf89ef Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 00:22:37 -0400
Subject: [PATCH 38/54] Remove localization guard from cppm
---
libcxx/modules/std.cppm.in | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in
index 6f396e9ccdd90..b468e0f271ad0 100644
--- a/libcxx/modules/std.cppm.in
+++ b/libcxx/modules/std.cppm.in
@@ -138,9 +138,7 @@ module;
# include <syncstream>
#endif
#include <system_error>
-#if _LIBCPP_HAS_LOCALIZATION
-# include <text_encoding>
-#endif
+#include <text_encoding>
#include <thread>
#include <tuple>
#include <type_traits>
>From 9c6c8a10218423617f057c30234d11572b68f49a Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 00:24:22 -0400
Subject: [PATCH 39/54] Move locale::encoding() to header
---
libcxx/include/__locale | 11 ++++++++++-
libcxx/src/locale.cpp | 12 ------------
2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 7001c7af35b62..81172ec609749 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -105,7 +105,16 @@ public:
string name() const;
# if _LIBCPP_STD_VER >= 26
- text_encoding encoding() const;
+ text_encoding encoding() const {
+ std::string __name = this->name();
+ if (__name.size() == 1) {
+ if (__name[0] == 'C')
+ return std::text_encoding(text_encoding::id::ASCII);
+ if (__name[0] == '*')
+ return std::text_encoding();
+ }
+ return std::text_encoding(__name);
+ }
# endif // _LIBCPP_STD_VER >= 26
bool operator==(const locale&) const;
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index a91f3cc1fcefa..177ca4480e4b7 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -558,18 +558,6 @@ locale::locale(const locale& other, const locale& one, category c)
string locale::name() const { return __locale_->name(); }
-#if _LIBCPP_STD_VER >= 26
-text_encoding locale::encoding() const {
- std::string __name = this->name();
- if (__name.size() == 1) {
- if (__name[0] == 'C')
- return std::text_encoding(text_encoding::id::ASCII);
- if (__name[0] == '*')
- return std::text_encoding();
- }
- return std::text_encoding(__name);
-}
-#endif // _LIBCPP_STD_VER >= 26
void locale::__install_ctor(const locale& other, facet* f, long facet_id) {
if (f)
>From ea13241de2bbd2884fbbd334a378e53520b33ed9 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 00:28:51 -0400
Subject: [PATCH 40/54] Move an include in test_text_encoding.h
---
libcxx/test/support/test_text_encoding.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/libcxx/test/support/test_text_encoding.h b/libcxx/test/support/test_text_encoding.h
index 194a4d4830330..d88096abb79fd 100644
--- a/libcxx/test/support/test_text_encoding.h
+++ b/libcxx/test/support/test_text_encoding.h
@@ -9,10 +9,11 @@
#ifndef SUPPORT_TEST_TEXT_ENCODING_H
#define SUPPORT_TEST_TEXT_ENCODING_H
-#include "test_macros.h"
#include <array>
#include <cstdint>
+#include "test_macros.h"
+
struct encoding_pair {
int_least32_t mib;
const char* name;
>From 29204ee8c45671a13e67b909bd8d9959cf000a0f Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 18:38:25 -0400
Subject: [PATCH 41/54] Break __make_locale into it's own function and call
that from environment()
---
libcxx/include/CMakeLists.txt | 1 +
.../include/__text_encoding/text_encoding.h | 6 +++-
.../text_encoding_get_locale.h | 20 +++++++++++++
libcxx/src/text_encoding.cpp | 29 +++++++------------
4 files changed, 37 insertions(+), 19 deletions(-)
create mode 100644 libcxx/include/__text_encoding/text_encoding_get_locale.h
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index ba61ee7c11e35..89e56d94d590d 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -752,6 +752,7 @@ set(files
__system_error/system_error.h
__system_error/throw_system_error.h
__text_encoding/text_encoding.h
+ __text_encoding/text_encoding_get_locale.h
__thread/formatter.h
__thread/id.h
__thread/jthread.h
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 1b97b24f90e9b..5418469440e94 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -25,6 +25,7 @@
#include <__iterator/iterator_traits.h>
#include <__ranges/view_interface.h>
#include <__string/char_traits.h>
+#include <__text_encoding/text_encoding_make_locale.h>
#include <__utility/unreachable.h>
#include <cstdint>
#include <string_view>
@@ -489,7 +490,10 @@ struct text_encoding {
# endif
}
- [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI static text_encoding environment();
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment()
+ {
+ return text_encoding(__get_locale_encoding(""));
+ };
template <id __i>
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() {
diff --git a/libcxx/include/__text_encoding/text_encoding_get_locale.h b/libcxx/include/__text_encoding/text_encoding_get_locale.h
new file mode 100644
index 0000000000000..f68bc81694f9f
--- /dev/null
+++ b/libcxx/include/__text_encoding/text_encoding_get_locale.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_REP_H
+#define _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_REP_H
+
+#include <__config>
+#include <string_view>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+string_view _LIBCPP_EXPORTED_FROM_ABI __get_locale_encoding(const char* __name);
+
+_LIBCPP_END_NAMESPACE_STD
+#endif
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 4f3077e79fe9b..de1be6b0a9e5c 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -8,30 +8,23 @@
#include <__config>
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-# pragma GCC system_header
-#endif
-
#include <__locale_dir/locale_base_api.h>
-#include <text_encoding>
+#include <__text_encoding/text_encoding_make_locale.h>
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS
-text_encoding text_encoding::environment() {
- auto __make_locale = [](const char* __name) {
- text_encoding __enc{};
- if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
- if (const char* __codeset = __locale::__nl_langinfo_l(CODESET, __loc)) {
- string_view __s(__codeset);
- if (__s.size() < max_name_length)
- __enc = text_encoding(__s);
- }
- __locale::__freelocale(__loc);
+string_view __get_locale_encoding(const char* __name){
+ std::string_view __encoding_str{""};
+ if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
+ if (const char* __codeset = __locale::__nl_langinfo_l(CODESET, __loc)) {
+ string_view __s(__codeset);
+ if (__s.size() < 63)
+ __encoding_str = __s;
}
- return __enc;
- };
- return __make_locale("");
+ __locale::__freelocale(__loc);
+ }
+ return __encoding_str;
}
_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS
>From aa8ccb1aa7218c7dd4afa323516bb30b6bdc94dd Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 18:51:39 -0400
Subject: [PATCH 42/54] Pass __get_locale_encoding to locale::environment() to
get the correct encoding
---
libcxx/include/__locale | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 81172ec609749..2dc375cb1a778 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -113,7 +113,7 @@ public:
if (__name[0] == '*')
return std::text_encoding();
}
- return std::text_encoding(__name);
+ return std::text_encoding(std::__get_locale_encoding(__name.c_str()));
}
# endif // _LIBCPP_STD_VER >= 26
>From ed5d69772a880dfe2a22a8f8b82dc7118beb6fdd Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 18:59:06 -0400
Subject: [PATCH 43/54] Fix typos
---
libcxx/include/__text_encoding/text_encoding.h | 2 +-
libcxx/include/__text_encoding/text_encoding_get_locale.h | 4 ++--
libcxx/src/text_encoding.cpp | 2 +-
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 5418469440e94..f5ee297efd28f 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -25,7 +25,7 @@
#include <__iterator/iterator_traits.h>
#include <__ranges/view_interface.h>
#include <__string/char_traits.h>
-#include <__text_encoding/text_encoding_make_locale.h>
+#include <__text_encoding/text_encoding_get_locale.h>
#include <__utility/unreachable.h>
#include <cstdint>
#include <string_view>
diff --git a/libcxx/include/__text_encoding/text_encoding_get_locale.h b/libcxx/include/__text_encoding/text_encoding_get_locale.h
index f68bc81694f9f..1aa195fc529c3 100644
--- a/libcxx/include/__text_encoding/text_encoding_get_locale.h
+++ b/libcxx/include/__text_encoding/text_encoding_get_locale.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_REP_H
-#define _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_REP_H
+#ifndef _LIBCPP___TEXT_ENCODING_TEXT_GET_LOCALE_H
+#define _LIBCPP___TEXT_ENCODING_TEXT_GET_LOCALE_H
#include <__config>
#include <string_view>
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index de1be6b0a9e5c..365cd1ae8dc2c 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -9,7 +9,7 @@
#include <__config>
#include <__locale_dir/locale_base_api.h>
-#include <__text_encoding/text_encoding_make_locale.h>
+#include <__text_encoding/text_encoding_get_locale.h>
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS
>From 4c964574a9e6abce50a156718100577a85014e48 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 21:45:39 -0400
Subject: [PATCH 44/54] Gate environment() behind _LIBCPP_HAS_LOCALIZATION
---
libcxx/include/CMakeLists.txt | 2 +-
...ncoding_get_locale.h => get_locale_encoding.h} | 15 ++++++++++-----
libcxx/include/__text_encoding/text_encoding.h | 10 +++++++---
libcxx/include/module.modulemap.in | 1 +
libcxx/src/CMakeLists.txt | 2 +-
libcxx/src/locale.cpp | 1 -
libcxx/src/text_encoding.cpp | 2 +-
7 files changed, 21 insertions(+), 12 deletions(-)
rename libcxx/include/__text_encoding/{text_encoding_get_locale.h => get_locale_encoding.h} (71%)
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 89e56d94d590d..938f756a89f31 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -752,7 +752,7 @@ set(files
__system_error/system_error.h
__system_error/throw_system_error.h
__text_encoding/text_encoding.h
- __text_encoding/text_encoding_get_locale.h
+ __text_encoding/get_locale_encoding.h
__thread/formatter.h
__thread/id.h
__thread/jthread.h
diff --git a/libcxx/include/__text_encoding/text_encoding_get_locale.h b/libcxx/include/__text_encoding/get_locale_encoding.h
similarity index 71%
rename from libcxx/include/__text_encoding/text_encoding_get_locale.h
rename to libcxx/include/__text_encoding/get_locale_encoding.h
index 1aa195fc529c3..5a771e7a647c0 100644
--- a/libcxx/include/__text_encoding/text_encoding_get_locale.h
+++ b/libcxx/include/__text_encoding/get_locale_encoding.h
@@ -6,15 +6,20 @@
//
//===----------------------------------------------------------------------===//
-#ifndef _LIBCPP___TEXT_ENCODING_TEXT_GET_LOCALE_H
-#define _LIBCPP___TEXT_ENCODING_TEXT_GET_LOCALE_H
+#ifndef _LIBCPP___GET_LOCALE_ENCODING_H
+#define _LIBCPP___GET_LOCALE_ENCODING_H
#include <__config>
-#include <string_view>
-_LIBCPP_BEGIN_NAMESPACE_STD
+#if _LIBCPP_HAS_LOCALIZATION
+
+# include <string_view>
+_LIBCPP_BEGIN_NAMESPACE_STD
string_view _LIBCPP_EXPORTED_FROM_ABI __get_locale_encoding(const char* __name);
_LIBCPP_END_NAMESPACE_STD
-#endif
+
+#endif // _LIBCPP_HAS_LOCALIZATION
+
+#endif // _LIBCPP___GET_LOCALE_ENCODING_H
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index f5ee297efd28f..985874e69ce13 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -25,7 +25,7 @@
#include <__iterator/iterator_traits.h>
#include <__ranges/view_interface.h>
#include <__string/char_traits.h>
-#include <__text_encoding/text_encoding_get_locale.h>
+#include <__text_encoding/get_locale_encoding.h>
#include <__utility/unreachable.h>
#include <cstdint>
#include <string_view>
@@ -490,8 +490,8 @@ struct text_encoding {
# endif
}
- [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment()
- {
+# if _LIBCPP_HAS_LOCALIZATION
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment() {
return text_encoding(__get_locale_encoding(""));
};
@@ -499,6 +499,10 @@ struct text_encoding {
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() {
return environment() == __i;
}
+# else
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static text_encoding environment() = delete;
+ [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static bool environment_is() = delete;
+# endif // _LIBCPP_HAS_LOCALIZATION
private:
_LIBCPP_HIDE_FROM_ABI static constexpr bool __comp_name(string_view __a, string_view __b) {
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index a347538620f4d..b7c294a4207a1 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -2057,6 +2057,7 @@ module std [system] {
}
module text_encoding {
+ module get_locale { header "__text_encoding/get_locale_encoding.h" }
module text_encoding { header "__text_encoding/text_encoding.h" }
header "text_encoding"
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index ea28fbe1c9250..1a98812894896 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -54,7 +54,6 @@ set(LIBCXX_SOURCES
support/runtime/stdexcept_default.ipp
support/runtime/stdexcept_vcruntime.ipp
system_error.cpp
- text_encoding.cpp
typeinfo.cpp
valarray.cpp
variant.cpp
@@ -93,6 +92,7 @@ if (LIBCXX_ENABLE_LOCALIZATION)
ostream.cpp
regex.cpp
strstream.cpp
+ text_encoding.cpp
)
endif()
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 177ca4480e4b7..d981d137cf1ba 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -558,7 +558,6 @@ locale::locale(const locale& other, const locale& one, category c)
string locale::name() const { return __locale_->name(); }
-
void locale::__install_ctor(const locale& other, facet* f, long facet_id) {
if (f)
__locale_ = new __imp(*other.__locale_, f, facet_id);
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index 365cd1ae8dc2c..ef1bff2797ea7 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -9,7 +9,7 @@
#include <__config>
#include <__locale_dir/locale_base_api.h>
-#include <__text_encoding/text_encoding_get_locale.h>
+#include <__text_encoding/get_locale_encoding.h>
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS
>From c897efb1a4ba5e6c83facdd3177d6c1495ff3e41 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 21:47:40 -0400
Subject: [PATCH 45/54] Clang-Format
---
libcxx/include/__text_encoding/text_encoding.h | 8 +++++---
libcxx/src/text_encoding.cpp | 4 ++--
libcxx/test/support/test_text_encoding.h | 5 ++---
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 985874e69ce13..04bd3442e40c9 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -441,11 +441,13 @@ struct text_encoding {
_LIBCPP_HIDE_FROM_ABI constexpr __iterator(const __encoding_data* __enc_d) noexcept
: __data_(__enc_d), __mib_rep_(__enc_d ? __enc_d->__mib_rep_ : 0) {}
- _LIBCPP_HIDE_FROM_ABI constexpr bool __can_dereference() const { return __data_ && __data_->__mib_rep_ == __mib_rep_; }
+ _LIBCPP_HIDE_FROM_ABI constexpr bool __can_dereference() const {
+ return __data_ && __data_->__mib_rep_ == __mib_rep_;
+ }
// default iterator is a sentinel
- const __encoding_data* __data_ = nullptr;
- __id_rep __mib_rep_ = 0;
+ const __encoding_data* __data_ = nullptr;
+ __id_rep __mib_rep_ = 0;
};
constexpr __iterator begin() const { return __iterator{__view_data_}; }
diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp
index ef1bff2797ea7..d02c4c1b566a5 100644
--- a/libcxx/src/text_encoding.cpp
+++ b/libcxx/src/text_encoding.cpp
@@ -14,13 +14,13 @@
_LIBCPP_BEGIN_NAMESPACE_STD
_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS
-string_view __get_locale_encoding(const char* __name){
+string_view __get_locale_encoding(const char* __name) {
std::string_view __encoding_str{""};
if (auto __loc = __locale::__newlocale(LC_CTYPE_MASK, __name, static_cast<locale_t>(0))) {
if (const char* __codeset = __locale::__nl_langinfo_l(CODESET, __loc)) {
string_view __s(__codeset);
if (__s.size() < 63)
- __encoding_str = __s;
+ __encoding_str = __s;
}
__locale::__freelocale(__loc);
}
diff --git a/libcxx/test/support/test_text_encoding.h b/libcxx/test/support/test_text_encoding.h
index d88096abb79fd..52452992bd2df 100644
--- a/libcxx/test/support/test_text_encoding.h
+++ b/libcxx/test/support/test_text_encoding.h
@@ -19,7 +19,7 @@ struct encoding_pair {
const char* name;
};
-constexpr std::array<const encoding_pair, 882> all_encoding_data{ {
+constexpr std::array<const encoding_pair, 882> all_encoding_data{{
{1, ""},
{2, ""},
@@ -903,8 +903,7 @@ constexpr std::array<const encoding_pair, 882> all_encoding_data{ {
{2259, "csTIS620"},
{2260, "CP50220"},
{2260, "csCP50220"},
- }
-};
+}};
constexpr std::array<const encoding_pair, 256> unique_encoding_data{{
{3, "ANSI_X3.4-1968"},
>From 4428c2d8612c01078f259dd7e43d8ed25393022c Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:04:15 -0400
Subject: [PATCH 46/54] Fix style things
---
libcxx/include/CMakeLists.txt | 2 +-
libcxx/include/__text_encoding/get_locale_encoding.h | 4 ++++
libcxx/include/module.modulemap.in | 4 ++--
libcxx/modules/std/text_encoding.inc | 2 +-
4 files changed, 8 insertions(+), 4 deletions(-)
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 938f756a89f31..4e8863b3ae8dd 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -751,8 +751,8 @@ set(files
__system_error/error_condition.h
__system_error/system_error.h
__system_error/throw_system_error.h
- __text_encoding/text_encoding.h
__text_encoding/get_locale_encoding.h
+ __text_encoding/text_encoding.h
__thread/formatter.h
__thread/id.h
__thread/jthread.h
diff --git a/libcxx/include/__text_encoding/get_locale_encoding.h b/libcxx/include/__text_encoding/get_locale_encoding.h
index 5a771e7a647c0..9e2a6d7f08d61 100644
--- a/libcxx/include/__text_encoding/get_locale_encoding.h
+++ b/libcxx/include/__text_encoding/get_locale_encoding.h
@@ -11,6 +11,10 @@
#include <__config>
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
#if _LIBCPP_HAS_LOCALIZATION
# include <string_view>
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index b7c294a4207a1..7cb4164d95b77 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -2057,13 +2057,13 @@ module std [system] {
}
module text_encoding {
- module get_locale { header "__text_encoding/get_locale_encoding.h" }
+ module get_locale_encoding { header "__text_encoding/get_locale_encoding.h" }
module text_encoding { header "__text_encoding/text_encoding.h" }
header "text_encoding"
export *
}
-
+
module thread {
module formatter { header "__thread/formatter.h" }
module id { header "__thread/id.h" }
diff --git a/libcxx/modules/std/text_encoding.inc b/libcxx/modules/std/text_encoding.inc
index 23dd71965414c..c425b98daa5a0 100644
--- a/libcxx/modules/std/text_encoding.inc
+++ b/libcxx/modules/std/text_encoding.inc
@@ -10,7 +10,7 @@
export namespace std {
#if _LIBCPP_STD_VER >= 26
using ::std::hash;
- using std::text_encoding;
using ::std::ranges::enable_borrowed_range;
+ using ::std::text_encoding;
#endif
} // namespace std
>From eb68ac48854bc3621a779701d8e73f1417ddb5e2 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:04:45 -0400
Subject: [PATCH 47/54] Implement locale::encoding() without
__get_locale_encoding() due to module visibility issues
---
libcxx/include/__locale | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 2dc375cb1a778..daf67c036621e 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -105,7 +105,7 @@ public:
string name() const;
# if _LIBCPP_STD_VER >= 26
- text_encoding encoding() const {
+ _LIBCPP_HIDE_FROM_ABI text_encoding encoding() const {
std::string __name = this->name();
if (__name.size() == 1) {
if (__name[0] == 'C')
@@ -113,7 +113,17 @@ public:
if (__name[0] == '*')
return std::text_encoding();
}
- return std::text_encoding(std::__get_locale_encoding(__name.c_str()));
+
+ auto __encoding_divider = __name.find('.'), __modifier_at = __name.find('@');
+ if (__encoding_divider == std::string::npos) {
+ return std::text_encoding();
+ }
+
+ if (__modifier_at == std::string::npos) {
+ return std::text_encoding(__name.c_str() + __encoding_divider + 1);
+ }
+
+ return std::text_encoding({__name.c_str() + __encoding_divider + 1, __modifier_at - 1 - __encoding_divider});
}
# endif // _LIBCPP_STD_VER >= 26
>From 9d7e79aa2832d6f420ef3f55856083e8d48a48d5 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:05:11 -0400
Subject: [PATCH 48/54] Reorder __encoding_data members to optimize padding,
reorder data table
---
.../include/__text_encoding/text_encoding.h | 1766 ++++++++---------
1 file changed, 883 insertions(+), 883 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 04bd3442e40c9..b4f3d7d5c01db 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -42,8 +42,8 @@ struct text_encoding {
private:
using __id_rep _LIBCPP_NODEBUG = int_least32_t;
struct __encoding_data {
- __id_rep __mib_rep_;
const char* __name_;
+ __id_rep __mib_rep_;
uint_least32_t __name_size_;
friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) noexcept {
@@ -562,888 +562,888 @@ struct text_encoding {
}
_LIBCPP_HIDE_FROM_ABI static constexpr __encoding_data __text_encoding_data[] = {
- {1, "", 0},
- {2, "", 0},
- {3, "ANSI_X3.4-1968", 14},
- {3, "ANSI_X3.4-1986", 14},
- {3, "IBM367", 6},
- {3, "ISO646-US", 9},
- {3, "ISO_646.irv:1991", 16},
- {3, "cp367", 5},
- {3, "csASCII", 7},
- {3, "iso-ir-6", 8},
- {3, "us", 2},
- {4, "ISO-8859-1", 10},
- {4, "ISO_8859-1:1987", 15},
- {4, "CP819", 5},
- {4, "IBM819", 6},
- {4, "ISO_8859-1", 10},
- {4, "csISOLatin1", 11},
- {4, "iso-ir-100", 10},
- {4, "l1", 2},
- {4, "latin1", 6},
- {5, "ISO-8859-2", 10},
- {5, "ISO_8859-2:1987", 15},
- {5, "ISO_8859-2", 10},
- {5, "csISOLatin2", 11},
- {5, "iso-ir-101", 10},
- {5, "l2", 2},
- {5, "latin2", 6},
- {6, "ISO-8859-3", 10},
- {6, "ISO_8859-3:1988", 15},
- {6, "ISO_8859-3", 10},
- {6, "csISOLatin3", 11},
- {6, "iso-ir-109", 10},
- {6, "l3", 2},
- {6, "latin3", 6},
- {7, "ISO-8859-4", 10},
- {7, "ISO_8859-4:1988", 15},
- {7, "ISO_8859-4", 10},
- {7, "csISOLatin4", 11},
- {7, "iso-ir-110", 10},
- {7, "l4", 2},
- {7, "latin4", 6},
- {8, "ISO-8859-5", 10},
- {8, "ISO_8859-5:1988", 15},
- {8, "ISO_8859-5", 10},
- {8, "csISOLatinCyrillic", 18},
- {8, "cyrillic", 8},
- {8, "iso-ir-144", 10},
- {9, "ISO-8859-6", 10},
- {9, "ISO_8859-6:1987", 15},
- {9, "ASMO-708", 8},
- {9, "ECMA-114", 8},
- {9, "ISO_8859-6", 10},
- {9, "arabic", 6},
- {9, "csISOLatinArabic", 16},
- {9, "iso-ir-127", 10},
- {10, "ISO-8859-7", 10},
- {10, "ISO_8859-7:1987", 15},
- {10, "ECMA-118", 8},
- {10, "ELOT_928", 8},
- {10, "ISO_8859-7", 10},
- {10, "csISOLatinGreek", 15},
- {10, "greek", 5},
- {10, "greek8", 6},
- {10, "iso-ir-126", 10},
- {11, "ISO-8859-8", 10},
- {11, "ISO_8859-8:1988", 15},
- {11, "ISO_8859-8", 10},
- {11, "csISOLatinHebrew", 16},
- {11, "hebrew", 6},
- {11, "iso-ir-138", 10},
- {12, "ISO-8859-9", 10},
- {12, "ISO_8859-9:1989", 15},
- {12, "ISO_8859-9", 10},
- {12, "csISOLatin5", 11},
- {12, "iso-ir-148", 10},
- {12, "l5", 2},
- {12, "latin5", 6},
- {13, "ISO-8859-10", 11},
- {13, "ISO_8859-10:1992", 16},
- {13, "csISOLatin6", 11},
- {13, "iso-ir-157", 10},
- {13, "l6", 2},
- {13, "latin6", 6},
- {14, "ISO_6937-2-add", 14},
- {14, "csISOTextComm", 13},
- {14, "iso-ir-142", 10},
- {15, "JIS_X0201", 9},
- {15, "X0201", 5},
- {15, "csHalfWidthKatakana", 19},
- {16, "JIS_Encoding", 12},
- {16, "csJISEncoding", 13},
- {17, "Shift_JIS", 9},
- {17, "MS_Kanji", 8},
- {17, "csShiftJIS", 10},
- {18, "EUC-JP", 6},
- {18, "Extended_UNIX_Code_Packed_Format_for_Japanese", 45},
- {18, "csEUCPkdFmtJapanese", 19},
- {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese", 43},
- {19, "csEUCFixWidJapanese", 19},
- {20, "BS_4730", 7},
- {20, "ISO646-GB", 9},
- {20, "csISO4UnitedKingdom", 19},
- {20, "gb", 2},
- {20, "iso-ir-4", 8},
- {20, "uk", 2},
- {21, "SEN_850200_C", 12},
- {21, "ISO646-SE2", 10},
- {21, "csISO11SwedishForNames", 22},
- {21, "iso-ir-11", 9},
- {21, "se2", 3},
- {22, "IT", 2},
- {22, "ISO646-IT", 9},
- {22, "csISO15Italian", 14},
- {22, "iso-ir-15", 9},
- {23, "ES", 2},
- {23, "ISO646-ES", 9},
- {23, "csISO17Spanish", 14},
- {23, "iso-ir-17", 9},
- {24, "DIN_66003", 9},
- {24, "ISO646-DE", 9},
- {24, "csISO21German", 13},
- {24, "de", 2},
- {24, "iso-ir-21", 9},
- {25, "NS_4551-1", 9},
- {25, "ISO646-NO", 9},
- {25, "csISO60DanishNorwegian", 22},
- {25, "csISO60Norwegian1", 17},
- {25, "iso-ir-60", 9},
- {25, "no", 2},
- {26, "NF_Z_62-010", 11},
- {26, "ISO646-FR", 9},
- {26, "csISO69French", 13},
- {26, "fr", 2},
- {26, "iso-ir-69", 9},
- {27, "ISO-10646-UTF-1", 15},
- {27, "csISO10646UTF1", 14},
- {28, "ISO_646.basic:1983", 18},
- {28, "csISO646basic1983", 17},
- {28, "ref", 3},
- {29, "INVARIANT", 9},
- {29, "csINVARIANT", 11},
- {30, "ISO_646.irv:1983", 16},
- {30, "csISO2IntlRefVersion", 20},
- {30, "irv", 3},
- {30, "iso-ir-2", 8},
- {31, "NATS-SEFI", 9},
- {31, "csNATSSEFI", 10},
- {31, "iso-ir-8-1", 10},
- {32, "NATS-SEFI-ADD", 13},
- {32, "csNATSSEFIADD", 13},
- {32, "iso-ir-8-2", 10},
- {35, "SEN_850200_B", 12},
- {35, "FI", 2},
- {35, "ISO646-FI", 9},
- {35, "ISO646-SE", 9},
- {35, "csISO10Swedish", 14},
- {35, "iso-ir-10", 9},
- {35, "se", 2},
- {36, "KS_C_5601-1987", 14},
- {36, "KSC_5601", 8},
- {36, "KS_C_5601-1989", 14},
- {36, "csKSC56011987", 13},
- {36, "iso-ir-149", 10},
- {36, "korean", 6},
- {37, "ISO-2022-KR", 11},
- {37, "csISO2022KR", 11},
- {38, "EUC-KR", 6},
- {38, "csEUCKR", 7},
- {39, "ISO-2022-JP", 11},
- {39, "csISO2022JP", 11},
- {40, "ISO-2022-JP-2", 13},
- {40, "csISO2022JP2", 12},
- {41, "JIS_C6220-1969-jp", 17},
- {41, "JIS_C6220-1969", 14},
- {41, "csISO13JISC6220jp", 17},
- {41, "iso-ir-13", 9},
- {41, "katakana", 8},
- {41, "x0201-7", 7},
- {42, "JIS_C6220-1969-ro", 17},
- {42, "ISO646-JP", 9},
- {42, "csISO14JISC6220ro", 17},
- {42, "iso-ir-14", 9},
- {42, "jp", 2},
- {43, "PT", 2},
- {43, "ISO646-PT", 9},
- {43, "csISO16Portuguese", 17},
- {43, "iso-ir-16", 9},
- {44, "greek7-old", 10},
- {44, "csISO18Greek7Old", 16},
- {44, "iso-ir-18", 9},
- {45, "latin-greek", 11},
- {45, "csISO19LatinGreek", 17},
- {45, "iso-ir-19", 9},
- {46, "NF_Z_62-010_(1973)", 18},
- {46, "ISO646-FR1", 10},
- {46, "csISO25French", 13},
- {46, "iso-ir-25", 9},
- {47, "Latin-greek-1", 13},
- {47, "csISO27LatinGreek1", 18},
- {47, "iso-ir-27", 9},
- {48, "ISO_5427", 8},
- {48, "csISO5427Cyrillic", 17},
- {48, "iso-ir-37", 9},
- {49, "JIS_C6226-1978", 14},
- {49, "csISO42JISC62261978", 19},
- {49, "iso-ir-42", 9},
- {50, "BS_viewdata", 11},
- {50, "csISO47BSViewdata", 17},
- {50, "iso-ir-47", 9},
- {51, "INIS", 4},
- {51, "csISO49INIS", 11},
- {51, "iso-ir-49", 9},
- {52, "INIS-8", 6},
- {52, "csISO50INIS8", 12},
- {52, "iso-ir-50", 9},
- {53, "INIS-cyrillic", 13},
- {53, "csISO51INISCyrillic", 19},
- {53, "iso-ir-51", 9},
- {54, "ISO_5427:1981", 13},
- {54, "ISO5427Cyrillic1981", 19},
- {54, "csISO54271981", 13},
- {54, "iso-ir-54", 9},
- {55, "ISO_5428:1980", 13},
- {55, "csISO5428Greek", 14},
- {55, "iso-ir-55", 9},
- {56, "GB_1988-80", 10},
- {56, "ISO646-CN", 9},
- {56, "cn", 2},
- {56, "csISO57GB1988", 13},
- {56, "iso-ir-57", 9},
- {57, "GB_2312-80", 10},
- {57, "chinese", 7},
- {57, "csISO58GB231280", 15},
- {57, "iso-ir-58", 9},
- {58, "NS_4551-2", 9},
- {58, "ISO646-NO2", 10},
- {58, "csISO61Norwegian2", 17},
- {58, "iso-ir-61", 9},
- {58, "no2", 3},
- {59, "videotex-suppl", 14},
- {59, "csISO70VideotexSupp1", 20},
- {59, "iso-ir-70", 9},
- {60, "PT2", 3},
- {60, "ISO646-PT2", 10},
- {60, "csISO84Portuguese2", 18},
- {60, "iso-ir-84", 9},
- {61, "ES2", 3},
- {61, "ISO646-ES2", 10},
- {61, "csISO85Spanish2", 15},
- {61, "iso-ir-85", 9},
- {62, "MSZ_7795.3", 10},
- {62, "ISO646-HU", 9},
- {62, "csISO86Hungarian", 16},
- {62, "hu", 2},
- {62, "iso-ir-86", 9},
- {63, "JIS_C6226-1983", 14},
- {63, "JIS_X0208-1983", 14},
- {63, "csISO87JISX0208", 15},
- {63, "iso-ir-87", 9},
- {63, "x0208", 5},
- {64, "greek7", 6},
- {64, "csISO88Greek7", 13},
- {64, "iso-ir-88", 9},
- {65, "ASMO_449", 8},
- {65, "ISO_9036", 8},
- {65, "arabic7", 7},
- {65, "csISO89ASMO449", 14},
- {65, "iso-ir-89", 9},
- {66, "iso-ir-90", 9},
- {66, "csISO90", 7},
- {67, "JIS_C6229-1984-a", 16},
- {67, "csISO91JISC62291984a", 20},
- {67, "iso-ir-91", 9},
- {67, "jp-ocr-a", 8},
- {68, "JIS_C6229-1984-b", 16},
- {68, "ISO646-JP-OCR-B", 15},
- {68, "csISO92JISC62991984b", 20},
- {68, "iso-ir-92", 9},
- {68, "jp-ocr-b", 8},
- {69, "JIS_C6229-1984-b-add", 20},
- {69, "csISO93JIS62291984badd", 22},
- {69, "iso-ir-93", 9},
- {69, "jp-ocr-b-add", 12},
- {70, "JIS_C6229-1984-hand", 19},
- {70, "csISO94JIS62291984hand", 22},
- {70, "iso-ir-94", 9},
- {70, "jp-ocr-hand", 11},
- {71, "JIS_C6229-1984-hand-add", 23},
- {71, "csISO95JIS62291984handadd", 25},
- {71, "iso-ir-95", 9},
- {71, "jp-ocr-hand-add", 15},
- {72, "JIS_C6229-1984-kana", 19},
- {72, "csISO96JISC62291984kana", 23},
- {72, "iso-ir-96", 9},
- {73, "ISO_2033-1983", 13},
- {73, "csISO2033", 9},
- {73, "e13b", 4},
- {73, "iso-ir-98", 9},
- {74, "ANSI_X3.110-1983", 16},
- {74, "CSA_T500-1983", 13},
- {74, "NAPLPS", 6},
- {74, "csISO99NAPLPS", 13},
- {74, "iso-ir-99", 9},
- {75, "T.61-7bit", 9},
- {75, "csISO102T617bit", 15},
- {75, "iso-ir-102", 10},
- {76, "T.61-8bit", 9},
- {76, "T.61", 4},
- {76, "csISO103T618bit", 15},
- {76, "iso-ir-103", 10},
- {77, "ECMA-cyrillic", 13},
- {77, "KOI8-E", 6},
- {77, "csISO111ECMACyrillic", 20},
- {77, "iso-ir-111", 10},
- {78, "CSA_Z243.4-1985-1", 17},
- {78, "ISO646-CA", 9},
- {78, "ca", 2},
- {78, "csISO121Canadian1", 17},
- {78, "csa7-1", 6},
- {78, "csa71", 5},
- {78, "iso-ir-121", 10},
- {79, "CSA_Z243.4-1985-2", 17},
- {79, "ISO646-CA2", 10},
- {79, "csISO122Canadian2", 17},
- {79, "csa7-2", 6},
- {79, "csa72", 5},
- {79, "iso-ir-122", 10},
- {80, "CSA_Z243.4-1985-gr", 18},
- {80, "csISO123CSAZ24341985gr", 22},
- {80, "iso-ir-123", 10},
- {81, "ISO-8859-6-E", 12},
- {81, "ISO_8859-6-E", 12},
- {81, "csISO88596E", 11},
- {82, "ISO-8859-6-I", 12},
- {82, "ISO_8859-6-I", 12},
- {82, "csISO88596I", 11},
- {83, "T.101-G2", 8},
- {83, "csISO128T101G2", 14},
- {83, "iso-ir-128", 10},
- {84, "ISO-8859-8-E", 12},
- {84, "ISO_8859-8-E", 12},
- {84, "csISO88598E", 11},
- {85, "ISO-8859-8-I", 12},
- {85, "ISO_8859-8-I", 12},
- {85, "csISO88598I", 11},
- {86, "CSN_369103", 10},
- {86, "csISO139CSN369103", 17},
- {86, "iso-ir-139", 10},
- {87, "JUS_I.B1.002", 12},
- {87, "ISO646-YU", 9},
- {87, "csISO141JUSIB1002", 17},
- {87, "iso-ir-141", 10},
- {87, "js", 2},
- {87, "yu", 2},
- {88, "IEC_P27-1", 9},
- {88, "csISO143IECP271", 15},
- {88, "iso-ir-143", 10},
- {89, "JUS_I.B1.003-serb", 17},
- {89, "csISO146Serbian", 15},
- {89, "iso-ir-146", 10},
- {89, "serbian", 7},
- {90, "JUS_I.B1.003-mac", 16},
- {90, "csISO147Macedonian", 18},
- {90, "iso-ir-147", 10},
- {90, "macedonian", 10},
- {91, "greek-ccitt", 11},
- {91, "csISO150", 8},
- {91, "csISO150GreekCCITT", 18},
- {91, "iso-ir-150", 10},
- {92, "NC_NC00-10:81", 13},
- {92, "ISO646-CU", 9},
- {92, "csISO151Cuba", 12},
- {92, "cuba", 4},
- {92, "iso-ir-151", 10},
- {93, "ISO_6937-2-25", 13},
- {93, "csISO6937Add", 12},
- {93, "iso-ir-152", 10},
- {94, "GOST_19768-74", 13},
- {94, "ST_SEV_358-88", 13},
- {94, "csISO153GOST1976874", 19},
- {94, "iso-ir-153", 10},
- {95, "ISO_8859-supp", 13},
- {95, "csISO8859Supp", 13},
- {95, "iso-ir-154", 10},
- {95, "latin1-2-5", 10},
- {96, "ISO_10367-box", 13},
- {96, "csISO10367Box", 13},
- {96, "iso-ir-155", 10},
- {97, "latin-lap", 9},
- {97, "csISO158Lap", 11},
- {97, "iso-ir-158", 10},
- {97, "lap", 3},
- {98, "JIS_X0212-1990", 14},
- {98, "csISO159JISX02121990", 20},
- {98, "iso-ir-159", 10},
- {98, "x0212", 5},
- {99, "DS_2089", 7},
- {99, "DS2089", 6},
- {99, "ISO646-DK", 9},
- {99, "csISO646Danish", 14},
- {99, "dk", 2},
- {100, "us-dk", 5},
- {100, "csUSDK", 6},
- {101, "dk-us", 5},
- {101, "csDKUS", 6},
- {102, "KSC5636", 7},
- {102, "ISO646-KR", 9},
- {102, "csKSC5636", 9},
- {103, "UNICODE-1-1-UTF-7", 17},
- {103, "csUnicode11UTF7", 15},
- {104, "ISO-2022-CN", 11},
- {104, "csISO2022CN", 11},
- {105, "ISO-2022-CN-EXT", 15},
- {105, "csISO2022CNEXT", 14},
- {106, "UTF-8", 5},
- {106, "csUTF8", 6},
- {109, "ISO-8859-13", 11},
- {109, "csISO885913", 11},
- {110, "ISO-8859-14", 11},
- {110, "ISO_8859-14", 11},
- {110, "ISO_8859-14:1998", 16},
- {110, "csISO885914", 11},
- {110, "iso-celtic", 10},
- {110, "iso-ir-199", 10},
- {110, "l8", 2},
- {110, "latin8", 6},
- {111, "ISO-8859-15", 11},
- {111, "ISO_8859-15", 11},
- {111, "Latin-9", 7},
- {111, "csISO885915", 11},
- {112, "ISO-8859-16", 11},
- {112, "ISO_8859-16", 11},
- {112, "ISO_8859-16:2001", 16},
- {112, "csISO885916", 11},
- {112, "iso-ir-226", 10},
- {112, "l10", 3},
- {112, "latin10", 7},
- {113, "GBK", 3},
- {113, "CP936", 5},
- {113, "MS936", 5},
- {113, "csGBK", 5},
- {113, "windows-936", 11},
- {114, "GB18030", 7},
- {114, "csGB18030", 9},
- {115, "OSD_EBCDIC_DF04_15", 18},
- {115, "csOSDEBCDICDF0415", 17},
- {116, "OSD_EBCDIC_DF03_IRV", 19},
- {116, "csOSDEBCDICDF03IRV", 18},
- {117, "OSD_EBCDIC_DF04_1", 17},
- {117, "csOSDEBCDICDF041", 16},
- {118, "ISO-11548-1", 11},
- {118, "ISO_11548-1", 11},
- {118, "ISO_TR_11548-1", 14},
- {118, "csISO115481", 11},
- {119, "KZ-1048", 7},
- {119, "RK1048", 6},
- {119, "STRK1048-2002", 13},
- {119, "csKZ1048", 8},
- {1000, "ISO-10646-UCS-2", 15},
- {1000, "csUnicode", 9},
- {1001, "ISO-10646-UCS-4", 15},
- {1001, "csUCS4", 6},
- {1002, "ISO-10646-UCS-Basic", 19},
- {1002, "csUnicodeASCII", 14},
- {1003, "ISO-10646-Unicode-Latin1", 24},
- {1003, "ISO-10646", 9},
- {1003, "csUnicodeLatin1", 15},
- {1004, "ISO-10646-J-1", 13},
- {1004, "csUnicodeJapanese", 17},
- {1005, "ISO-Unicode-IBM-1261", 20},
- {1005, "csUnicodeIBM1261", 16},
- {1006, "ISO-Unicode-IBM-1268", 20},
- {1006, "csUnicodeIBM1268", 16},
- {1007, "ISO-Unicode-IBM-1276", 20},
- {1007, "csUnicodeIBM1276", 16},
- {1008, "ISO-Unicode-IBM-1264", 20},
- {1008, "csUnicodeIBM1264", 16},
- {1009, "ISO-Unicode-IBM-1265", 20},
- {1009, "csUnicodeIBM1265", 16},
- {1010, "UNICODE-1-1", 11},
- {1010, "csUnicode11", 11},
- {1011, "SCSU", 4},
- {1011, "csSCSU", 6},
- {1012, "UTF-7", 5},
- {1012, "csUTF7", 6},
- {1013, "UTF-16BE", 8},
- {1013, "csUTF16BE", 9},
- {1014, "UTF-16LE", 8},
- {1014, "csUTF16LE", 9},
- {1015, "UTF-16", 6},
- {1015, "csUTF16", 7},
- {1016, "CESU-8", 6},
- {1016, "csCESU-8", 8},
- {1016, "csCESU8", 7},
- {1017, "UTF-32", 6},
- {1017, "csUTF32", 7},
- {1018, "UTF-32BE", 8},
- {1018, "csUTF32BE", 9},
- {1019, "UTF-32LE", 8},
- {1019, "csUTF32LE", 9},
- {1020, "BOCU-1", 6},
- {1020, "csBOCU-1", 8},
- {1020, "csBOCU1", 7},
- {1021, "UTF-7-IMAP", 10},
- {1021, "csUTF7IMAP", 10},
- {2000, "ISO-8859-1-Windows-3.0-Latin-1", 30},
- {2000, "csWindows30Latin1", 17},
- {2001, "ISO-8859-1-Windows-3.1-Latin-1", 30},
- {2001, "csWindows31Latin1", 17},
- {2002, "ISO-8859-2-Windows-Latin-2", 26},
- {2002, "csWindows31Latin2", 17},
- {2003, "ISO-8859-9-Windows-Latin-5", 26},
- {2003, "csWindows31Latin5", 17},
- {2004, "hp-roman8", 9},
- {2004, "csHPRoman8", 10},
- {2004, "r8", 2},
- {2004, "roman8", 6},
- {2005, "Adobe-Standard-Encoding", 23},
- {2005, "csAdobeStandardEncoding", 23},
- {2006, "Ventura-US", 10},
- {2006, "csVenturaUS", 11},
- {2007, "Ventura-International", 21},
- {2007, "csVenturaInternational", 22},
- {2008, "DEC-MCS", 7},
- {2008, "csDECMCS", 8},
- {2008, "dec", 3},
- {2009, "IBM850", 6},
- {2009, "850", 3},
- {2009, "cp850", 5},
- {2009, "csPC850Multilingual", 19},
- {2010, "IBM852", 6},
- {2010, "852", 3},
- {2010, "cp852", 5},
- {2010, "csPCp852", 8},
- {2011, "IBM437", 6},
- {2011, "437", 3},
- {2011, "cp437", 5},
- {2011, "csPC8CodePage437", 16},
- {2012, "PC8-Danish-Norwegian", 20},
- {2012, "csPC8DanishNorwegian", 20},
- {2013, "IBM862", 6},
- {2013, "862", 3},
- {2013, "cp862", 5},
- {2013, "csPC862LatinHebrew", 18},
- {2014, "PC8-Turkish", 11},
- {2014, "csPC8Turkish", 12},
- {2015, "IBM-Symbols", 11},
- {2015, "csIBMSymbols", 12},
- {2016, "IBM-Thai", 8},
- {2016, "csIBMThai", 9},
- {2017, "HP-Legal", 8},
- {2017, "csHPLegal", 9},
- {2018, "HP-Pi-font", 10},
- {2018, "csHPPiFont", 10},
- {2019, "HP-Math8", 8},
- {2019, "csHPMath8", 9},
- {2020, "Adobe-Symbol-Encoding", 21},
- {2020, "csHPPSMath", 10},
- {2021, "HP-DeskTop", 10},
- {2021, "csHPDesktop", 11},
- {2022, "Ventura-Math", 12},
- {2022, "csVenturaMath", 13},
- {2023, "Microsoft-Publishing", 20},
- {2023, "csMicrosoftPublishing", 21},
- {2024, "Windows-31J", 11},
- {2024, "csWindows31J", 12},
- {2025, "GB2312", 6},
- {2025, "csGB2312", 8},
- {2026, "Big5", 4},
- {2026, "csBig5", 6},
- {2027, "macintosh", 9},
- {2027, "csMacintosh", 11},
- {2027, "mac", 3},
- {2028, "IBM037", 6},
- {2028, "cp037", 5},
- {2028, "csIBM037", 8},
- {2028, "ebcdic-cp-ca", 12},
- {2028, "ebcdic-cp-nl", 12},
- {2028, "ebcdic-cp-us", 12},
- {2028, "ebcdic-cp-wt", 12},
- {2029, "IBM038", 6},
- {2029, "EBCDIC-INT", 10},
- {2029, "cp038", 5},
- {2029, "csIBM038", 8},
- {2030, "IBM273", 6},
- {2030, "CP273", 5},
- {2030, "csIBM273", 8},
- {2031, "IBM274", 6},
- {2031, "CP274", 5},
- {2031, "EBCDIC-BE", 9},
- {2031, "csIBM274", 8},
- {2032, "IBM275", 6},
- {2032, "EBCDIC-BR", 9},
- {2032, "cp275", 5},
- {2032, "csIBM275", 8},
- {2033, "IBM277", 6},
- {2033, "EBCDIC-CP-DK", 12},
- {2033, "EBCDIC-CP-NO", 12},
- {2033, "csIBM277", 8},
- {2034, "IBM278", 6},
- {2034, "CP278", 5},
- {2034, "csIBM278", 8},
- {2034, "ebcdic-cp-fi", 12},
- {2034, "ebcdic-cp-se", 12},
- {2035, "IBM280", 6},
- {2035, "CP280", 5},
- {2035, "csIBM280", 8},
- {2035, "ebcdic-cp-it", 12},
- {2036, "IBM281", 6},
- {2036, "EBCDIC-JP-E", 11},
- {2036, "cp281", 5},
- {2036, "csIBM281", 8},
- {2037, "IBM284", 6},
- {2037, "CP284", 5},
- {2037, "csIBM284", 8},
- {2037, "ebcdic-cp-es", 12},
- {2038, "IBM285", 6},
- {2038, "CP285", 5},
- {2038, "csIBM285", 8},
- {2038, "ebcdic-cp-gb", 12},
- {2039, "IBM290", 6},
- {2039, "EBCDIC-JP-kana", 14},
- {2039, "cp290", 5},
- {2039, "csIBM290", 8},
- {2040, "IBM297", 6},
- {2040, "cp297", 5},
- {2040, "csIBM297", 8},
- {2040, "ebcdic-cp-fr", 12},
- {2041, "IBM420", 6},
- {2041, "cp420", 5},
- {2041, "csIBM420", 8},
- {2041, "ebcdic-cp-ar1", 13},
- {2042, "IBM423", 6},
- {2042, "cp423", 5},
- {2042, "csIBM423", 8},
- {2042, "ebcdic-cp-gr", 12},
- {2043, "IBM424", 6},
- {2043, "cp424", 5},
- {2043, "csIBM424", 8},
- {2043, "ebcdic-cp-he", 12},
- {2044, "IBM500", 6},
- {2044, "CP500", 5},
- {2044, "csIBM500", 8},
- {2044, "ebcdic-cp-be", 12},
- {2044, "ebcdic-cp-ch", 12},
- {2045, "IBM851", 6},
- {2045, "851", 3},
- {2045, "cp851", 5},
- {2045, "csIBM851", 8},
- {2046, "IBM855", 6},
- {2046, "855", 3},
- {2046, "cp855", 5},
- {2046, "csIBM855", 8},
- {2047, "IBM857", 6},
- {2047, "857", 3},
- {2047, "cp857", 5},
- {2047, "csIBM857", 8},
- {2048, "IBM860", 6},
- {2048, "860", 3},
- {2048, "cp860", 5},
- {2048, "csIBM860", 8},
- {2049, "IBM861", 6},
- {2049, "861", 3},
- {2049, "cp-is", 5},
- {2049, "cp861", 5},
- {2049, "csIBM861", 8},
- {2050, "IBM863", 6},
- {2050, "863", 3},
- {2050, "cp863", 5},
- {2050, "csIBM863", 8},
- {2051, "IBM864", 6},
- {2051, "cp864", 5},
- {2051, "csIBM864", 8},
- {2052, "IBM865", 6},
- {2052, "865", 3},
- {2052, "cp865", 5},
- {2052, "csIBM865", 8},
- {2053, "IBM868", 6},
- {2053, "CP868", 5},
- {2053, "cp-ar", 5},
- {2053, "csIBM868", 8},
- {2054, "IBM869", 6},
- {2054, "869", 3},
- {2054, "cp-gr", 5},
- {2054, "cp869", 5},
- {2054, "csIBM869", 8},
- {2055, "IBM870", 6},
- {2055, "CP870", 5},
- {2055, "csIBM870", 8},
- {2055, "ebcdic-cp-roece", 15},
- {2055, "ebcdic-cp-yu", 12},
- {2056, "IBM871", 6},
- {2056, "CP871", 5},
- {2056, "csIBM871", 8},
- {2056, "ebcdic-cp-is", 12},
- {2057, "IBM880", 6},
- {2057, "EBCDIC-Cyrillic", 15},
- {2057, "cp880", 5},
- {2057, "csIBM880", 8},
- {2058, "IBM891", 6},
- {2058, "cp891", 5},
- {2058, "csIBM891", 8},
- {2059, "IBM903", 6},
- {2059, "cp903", 5},
- {2059, "csIBM903", 8},
- {2060, "IBM904", 6},
- {2060, "904", 3},
- {2060, "cp904", 5},
- {2060, "csIBBM904", 9},
- {2061, "IBM905", 6},
- {2061, "CP905", 5},
- {2061, "csIBM905", 8},
- {2061, "ebcdic-cp-tr", 12},
- {2062, "IBM918", 6},
- {2062, "CP918", 5},
- {2062, "csIBM918", 8},
- {2062, "ebcdic-cp-ar2", 13},
- {2063, "IBM1026", 7},
- {2063, "CP1026", 6},
- {2063, "csIBM1026", 9},
- {2064, "EBCDIC-AT-DE", 12},
- {2064, "csIBMEBCDICATDE", 15},
- {2065, "EBCDIC-AT-DE-A", 14},
- {2065, "csEBCDICATDEA", 13},
- {2066, "EBCDIC-CA-FR", 12},
- {2066, "csEBCDICCAFR", 12},
- {2067, "EBCDIC-DK-NO", 12},
- {2067, "csEBCDICDKNO", 12},
- {2068, "EBCDIC-DK-NO-A", 14},
- {2068, "csEBCDICDKNOA", 13},
- {2069, "EBCDIC-FI-SE", 12},
- {2069, "csEBCDICFISE", 12},
- {2070, "EBCDIC-FI-SE-A", 14},
- {2070, "csEBCDICFISEA", 13},
- {2071, "EBCDIC-FR", 9},
- {2071, "csEBCDICFR", 10},
- {2072, "EBCDIC-IT", 9},
- {2072, "csEBCDICIT", 10},
- {2073, "EBCDIC-PT", 9},
- {2073, "csEBCDICPT", 10},
- {2074, "EBCDIC-ES", 9},
- {2074, "csEBCDICES", 10},
- {2075, "EBCDIC-ES-A", 11},
- {2075, "csEBCDICESA", 11},
- {2076, "EBCDIC-ES-S", 11},
- {2076, "csEBCDICESS", 11},
- {2077, "EBCDIC-UK", 9},
- {2077, "csEBCDICUK", 10},
- {2078, "EBCDIC-US", 9},
- {2078, "csEBCDICUS", 10},
- {2079, "UNKNOWN-8BIT", 12},
- {2079, "csUnknown8BiT", 13},
- {2080, "MNEMONIC", 8},
- {2080, "csMnemonic", 10},
- {2081, "MNEM", 4},
- {2081, "csMnem", 6},
- {2082, "VISCII", 6},
- {2082, "csVISCII", 8},
- {2083, "VIQR", 4},
- {2083, "csVIQR", 6},
- {2084, "KOI8-R", 6},
- {2084, "csKOI8R", 7},
- {2085, "HZ-GB-2312", 10},
- {2086, "IBM866", 6},
- {2086, "866", 3},
- {2086, "cp866", 5},
- {2086, "csIBM866", 8},
- {2087, "IBM775", 6},
- {2087, "cp775", 5},
- {2087, "csPC775Baltic", 13},
- {2088, "KOI8-U", 6},
- {2088, "csKOI8U", 7},
- {2089, "IBM00858", 8},
- {2089, "CCSID00858", 10},
- {2089, "CP00858", 7},
- {2089, "PC-Multilingual-850+euro", 24},
- {2089, "csIBM00858", 10},
- {2090, "IBM00924", 8},
- {2090, "CCSID00924", 10},
- {2090, "CP00924", 7},
- {2090, "csIBM00924", 10},
- {2090, "ebcdic-Latin9--euro", 19},
- {2091, "IBM01140", 8},
- {2091, "CCSID01140", 10},
- {2091, "CP01140", 7},
- {2091, "csIBM01140", 10},
- {2091, "ebcdic-us-37+euro", 17},
- {2092, "IBM01141", 8},
- {2092, "CCSID01141", 10},
- {2092, "CP01141", 7},
- {2092, "csIBM01141", 10},
- {2092, "ebcdic-de-273+euro", 18},
- {2093, "IBM01142", 8},
- {2093, "CCSID01142", 10},
- {2093, "CP01142", 7},
- {2093, "csIBM01142", 10},
- {2093, "ebcdic-dk-277+euro", 18},
- {2093, "ebcdic-no-277+euro", 18},
- {2094, "IBM01143", 8},
- {2094, "CCSID01143", 10},
- {2094, "CP01143", 7},
- {2094, "csIBM01143", 10},
- {2094, "ebcdic-fi-278+euro", 18},
- {2094, "ebcdic-se-278+euro", 18},
- {2095, "IBM01144", 8},
- {2095, "CCSID01144", 10},
- {2095, "CP01144", 7},
- {2095, "csIBM01144", 10},
- {2095, "ebcdic-it-280+euro", 18},
- {2096, "IBM01145", 8},
- {2096, "CCSID01145", 10},
- {2096, "CP01145", 7},
- {2096, "csIBM01145", 10},
- {2096, "ebcdic-es-284+euro", 18},
- {2097, "IBM01146", 8},
- {2097, "CCSID01146", 10},
- {2097, "CP01146", 7},
- {2097, "csIBM01146", 10},
- {2097, "ebcdic-gb-285+euro", 18},
- {2098, "IBM01147", 8},
- {2098, "CCSID01147", 10},
- {2098, "CP01147", 7},
- {2098, "csIBM01147", 10},
- {2098, "ebcdic-fr-297+euro", 18},
- {2099, "IBM01148", 8},
- {2099, "CCSID01148", 10},
- {2099, "CP01148", 7},
- {2099, "csIBM01148", 10},
- {2099, "ebcdic-international-500+euro", 29},
- {2100, "IBM01149", 8},
- {2100, "CCSID01149", 10},
- {2100, "CP01149", 7},
- {2100, "csIBM01149", 10},
- {2100, "ebcdic-is-871+euro", 18},
- {2101, "Big5-HKSCS", 10},
- {2101, "csBig5HKSCS", 11},
- {2102, "IBM1047", 7},
- {2102, "IBM-1047", 8},
- {2102, "csIBM1047", 9},
- {2103, "PTCP154", 7},
- {2103, "CP154", 5},
- {2103, "Cyrillic-Asian", 14},
- {2103, "PT154", 5},
- {2103, "csPTCP154", 9},
- {2104, "Amiga-1251", 10},
- {2104, "Ami-1251", 8},
- {2104, "Ami1251", 7},
- {2104, "Amiga1251", 9},
- {2104, "csAmiga1251", 11},
- {2105, "KOI7-switched", 13},
- {2105, "csKOI7switched", 14},
- {2106, "BRF", 3},
- {2106, "csBRF", 5},
- {2107, "TSCII", 5},
- {2107, "csTSCII", 7},
- {2108, "CP51932", 7},
- {2108, "csCP51932", 9},
- {2109, "windows-874", 11},
- {2109, "cswindows874", 12},
- {2250, "windows-1250", 12},
- {2250, "cswindows1250", 13},
- {2251, "windows-1251", 12},
- {2251, "cswindows1251", 13},
- {2252, "windows-1252", 12},
- {2252, "cswindows1252", 13},
- {2253, "windows-1253", 12},
- {2253, "cswindows1253", 13},
- {2254, "windows-1254", 12},
- {2254, "cswindows1254", 13},
- {2255, "windows-1255", 12},
- {2255, "cswindows1255", 13},
- {2256, "windows-1256", 12},
- {2256, "cswindows1256", 13},
- {2257, "windows-1257", 12},
- {2257, "cswindows1257", 13},
- {2258, "windows-1258", 12},
- {2258, "cswindows1258", 13},
- {2259, "TIS-620", 7},
- {2259, "ISO-8859-11", 11},
- {2259, "csTIS620", 8},
- {2260, "CP50220", 7},
- {2260, "csCP50220", 9},
+ {"", 1, 0},
+ {"", 2, 0},
+ {"ANSI_X3.4-1968", 3, 14},
+ {"ANSI_X3.4-1986", 3, 14},
+ {"IBM367", 3, 6},
+ {"ISO646-US", 3, 9},
+ {"ISO_646.irv:1991", 3, 16},
+ {"cp367", 3, 5},
+ {"csASCII", 3, 7},
+ {"iso-ir-6", 3, 8},
+ {"us", 3, 2},
+ {"ISO-8859-1", 4, 10},
+ {"ISO_8859-1:1987", 4, 15},
+ {"CP819", 4, 5},
+ {"IBM819", 4, 6},
+ {"ISO_8859-1", 4, 10},
+ {"csISOLatin1", 4, 11},
+ {"iso-ir-100", 4, 10},
+ {"l1", 4, 2},
+ {"latin1", 4, 6},
+ {"ISO-8859-2", 5, 10},
+ {"ISO_8859-2:1987", 5, 15},
+ {"ISO_8859-2", 5, 10},
+ {"csISOLatin2", 5, 11},
+ {"iso-ir-101", 5, 10},
+ {"l2", 5, 2},
+ {"latin2", 5, 6},
+ {"ISO-8859-3", 6, 10},
+ {"ISO_8859-3:1988", 6, 15},
+ {"ISO_8859-3", 6, 10},
+ {"csISOLatin3", 6, 11},
+ {"iso-ir-109", 6, 10},
+ {"l3", 6, 2},
+ {"latin3", 6, 6},
+ {"ISO-8859-4", 7, 10},
+ {"ISO_8859-4:1988", 7, 15},
+ {"ISO_8859-4", 7, 10},
+ {"csISOLatin4", 7, 11},
+ {"iso-ir-110", 7, 10},
+ {"l4", 7, 2},
+ {"latin4", 7, 6},
+ {"ISO-8859-5", 8, 10},
+ {"ISO_8859-5:1988", 8, 15},
+ {"ISO_8859-5", 8, 10},
+ {"csISOLatinCyrillic", 8, 18},
+ {"cyrillic", 8, 8},
+ {"iso-ir-144", 8, 10},
+ {"ISO-8859-6", 9, 10},
+ {"ISO_8859-6:1987", 9, 15},
+ {"ASMO-708", 9, 8},
+ {"ECMA-114", 9, 8},
+ {"ISO_8859-6", 9, 10},
+ {"arabic", 9, 6},
+ {"csISOLatinArabic", 9, 16},
+ {"iso-ir-127", 9, 10},
+ {"ISO-8859-7", 10, 10},
+ {"ISO_8859-7:1987", 10, 15},
+ {"ECMA-118", 10, 8},
+ {"ELOT_928", 10, 8},
+ {"ISO_8859-7", 10, 10},
+ {"csISOLatinGreek", 10, 15},
+ {"greek", 10, 5},
+ {"greek8", 10, 6},
+ {"iso-ir-126", 10, 10},
+ {"ISO-8859-8", 11, 10},
+ {"ISO_8859-8:1988", 11, 15},
+ {"ISO_8859-8", 11, 10},
+ {"csISOLatinHebrew", 11, 16},
+ {"hebrew", 11, 6},
+ {"iso-ir-138", 11, 10},
+ {"ISO-8859-9", 12, 10},
+ {"ISO_8859-9:1989", 12, 15},
+ {"ISO_8859-9", 12, 10},
+ {"csISOLatin5", 12, 11},
+ {"iso-ir-148", 12, 10},
+ {"l5", 12, 2},
+ {"latin5", 12, 6},
+ {"ISO-8859-10", 13, 11},
+ {"ISO_8859-10:1992", 13, 16},
+ {"csISOLatin6", 13, 11},
+ {"iso-ir-157", 13, 10},
+ {"l6", 13, 2},
+ {"latin6", 13, 6},
+ {"ISO_6937-2-add", 14, 14},
+ {"csISOTextComm", 14, 13},
+ {"iso-ir-142", 14, 10},
+ {"JIS_X0201", 15, 9},
+ {"X0201", 15, 5},
+ {"csHalfWidthKatakana", 15, 19},
+ {"JIS_Encoding", 16, 12},
+ {"csJISEncoding", 16, 13},
+ {"Shift_JIS", 17, 9},
+ {"MS_Kanji", 17, 8},
+ {"csShiftJIS", 17, 10},
+ {"EUC-JP", 18, 6},
+ {"Extended_UNIX_Code_Packed_Format_for_Japanese", 18, 45},
+ {"csEUCPkdFmtJapanese", 18, 19},
+ {"Extended_UNIX_Code_Fixed_Width_for_Japanese", 19, 43},
+ {"csEUCFixWidJapanese", 19, 19},
+ {"BS_4730", 20, 7},
+ {"ISO646-GB", 20, 9},
+ {"csISO4UnitedKingdom", 20, 19},
+ {"gb", 20, 2},
+ {"iso-ir-4", 20, 8},
+ {"uk", 20, 2},
+ {"SEN_850200_C", 21, 12},
+ {"ISO646-SE2", 21, 10},
+ {"csISO11SwedishForNames", 21, 22},
+ {"iso-ir-11", 21, 9},
+ {"se2", 21, 3},
+ {"IT", 22, 2},
+ {"ISO646-IT", 22, 9},
+ {"csISO15Italian", 22, 14},
+ {"iso-ir-15", 22, 9},
+ {"ES", 23, 2},
+ {"ISO646-ES", 23, 9},
+ {"csISO17Spanish", 23, 14},
+ {"iso-ir-17", 23, 9},
+ {"DIN_66003", 24, 9},
+ {"ISO646-DE", 24, 9},
+ {"csISO21German", 24, 13},
+ {"de", 24, 2},
+ {"iso-ir-21", 24, 9},
+ {"NS_4551-1", 25, 9},
+ {"ISO646-NO", 25, 9},
+ {"csISO60DanishNorwegian", 25, 22},
+ {"csISO60Norwegian1", 25, 17},
+ {"iso-ir-60", 25, 9},
+ {"no", 25, 2},
+ {"NF_Z_62-010", 26, 11},
+ {"ISO646-FR", 26, 9},
+ {"csISO69French", 26, 13},
+ {"fr", 26, 2},
+ {"iso-ir-69", 26, 9},
+ {"ISO-10646-UTF-1", 27, 15},
+ {"csISO10646UTF1", 27, 14},
+ {"ISO_646.basic:1983", 28, 18},
+ {"csISO646basic1983", 28, 17},
+ {"ref", 28, 3},
+ {"INVARIANT", 29, 9},
+ {"csINVARIANT", 29, 11},
+ {"ISO_646.irv:1983", 30, 16},
+ {"csISO2IntlRefVersion", 30, 20},
+ {"irv", 30, 3},
+ {"iso-ir-2", 30, 8},
+ {"NATS-SEFI", 31, 9},
+ {"csNATSSEFI", 31, 10},
+ {"iso-ir-8-1", 31, 10},
+ {"NATS-SEFI-ADD", 32, 13},
+ {"csNATSSEFIADD", 32, 13},
+ {"iso-ir-8-2", 32, 10},
+ {"SEN_850200_B", 35, 12},
+ {"FI", 35, 2},
+ {"ISO646-FI", 35, 9},
+ {"ISO646-SE", 35, 9},
+ {"csISO10Swedish", 35, 14},
+ {"iso-ir-10", 35, 9},
+ {"se", 35, 2},
+ {"KS_C_5601-1987", 36, 14},
+ {"KSC_5601", 36, 8},
+ {"KS_C_5601-1989", 36, 14},
+ {"csKSC56011987", 36, 13},
+ {"iso-ir-149", 36, 10},
+ {"korean", 36, 6},
+ {"ISO-2022-KR", 37, 11},
+ {"csISO2022KR", 37, 11},
+ {"EUC-KR", 38, 6},
+ {"csEUCKR", 38, 7},
+ {"ISO-2022-JP", 39, 11},
+ {"csISO2022JP", 39, 11},
+ {"ISO-2022-JP-2", 40, 13},
+ {"csISO2022JP2", 40, 12},
+ {"JIS_C6220-1969-jp", 41, 17},
+ {"JIS_C6220-1969", 41, 14},
+ {"csISO13JISC6220jp", 41, 17},
+ {"iso-ir-13", 41, 9},
+ {"katakana", 41, 8},
+ {"x0201-7", 41, 7},
+ {"JIS_C6220-1969-ro", 42, 17},
+ {"ISO646-JP", 42, 9},
+ {"csISO14JISC6220ro", 42, 17},
+ {"iso-ir-14", 42, 9},
+ {"jp", 42, 2},
+ {"PT", 43, 2},
+ {"ISO646-PT", 43, 9},
+ {"csISO16Portuguese", 43, 17},
+ {"iso-ir-16", 43, 9},
+ {"greek7-old", 44, 10},
+ {"csISO18Greek7Old", 44, 16},
+ {"iso-ir-18", 44, 9},
+ {"latin-greek", 45, 11},
+ {"csISO19LatinGreek", 45, 17},
+ {"iso-ir-19", 45, 9},
+ {"NF_Z_62-010_(1973)", 46, 18},
+ {"ISO646-FR1", 46, 10},
+ {"csISO25French", 46, 13},
+ {"iso-ir-25", 46, 9},
+ {"Latin-greek-1", 47, 13},
+ {"csISO27LatinGreek1", 47, 18},
+ {"iso-ir-27", 47, 9},
+ {"ISO_5427", 48, 8},
+ {"csISO5427Cyrillic", 48, 17},
+ {"iso-ir-37", 48, 9},
+ {"JIS_C6226-1978", 49, 14},
+ {"csISO42JISC62261978", 49, 19},
+ {"iso-ir-42", 49, 9},
+ {"BS_viewdata", 50, 11},
+ {"csISO47BSViewdata", 50, 17},
+ {"iso-ir-47", 50, 9},
+ {"INIS", 51, 4},
+ {"csISO49INIS", 51, 11},
+ {"iso-ir-49", 51, 9},
+ {"INIS-8", 52, 6},
+ {"csISO50INIS8", 52, 12},
+ {"iso-ir-50", 52, 9},
+ {"INIS-cyrillic", 53, 13},
+ {"csISO51INISCyrillic", 53, 19},
+ {"iso-ir-51", 53, 9},
+ {"ISO_5427:1981", 54, 13},
+ {"ISO5427Cyrillic1981", 54, 19},
+ {"csISO54271981", 54, 13},
+ {"iso-ir-54", 54, 9},
+ {"ISO_5428:1980", 55, 13},
+ {"csISO5428Greek", 55, 14},
+ {"iso-ir-55", 55, 9},
+ {"GB_1988-80", 56, 10},
+ {"ISO646-CN", 56, 9},
+ {"cn", 56, 2},
+ {"csISO57GB1988", 56, 13},
+ {"iso-ir-57", 56, 9},
+ {"GB_2312-80", 57, 10},
+ {"chinese", 57, 7},
+ {"csISO58GB231280", 57, 15},
+ {"iso-ir-58", 57, 9},
+ {"NS_4551-2", 58, 9},
+ {"ISO646-NO2", 58, 10},
+ {"csISO61Norwegian2", 58, 17},
+ {"iso-ir-61", 58, 9},
+ {"no2", 58, 3},
+ {"videotex-suppl", 59, 14},
+ {"csISO70VideotexSupp1", 59, 20},
+ {"iso-ir-70", 59, 9},
+ {"PT2", 60, 3},
+ {"ISO646-PT2", 60, 10},
+ {"csISO84Portuguese2", 60, 18},
+ {"iso-ir-84", 60, 9},
+ {"ES2", 61, 3},
+ {"ISO646-ES2", 61, 10},
+ {"csISO85Spanish2", 61, 15},
+ {"iso-ir-85", 61, 9},
+ {"MSZ_7795.3", 62, 10},
+ {"ISO646-HU", 62, 9},
+ {"csISO86Hungarian", 62, 16},
+ {"hu", 62, 2},
+ {"iso-ir-86", 62, 9},
+ {"JIS_C6226-1983", 63, 14},
+ {"JIS_X0208-1983", 63, 14},
+ {"csISO87JISX0208", 63, 15},
+ {"iso-ir-87", 63, 9},
+ {"x0208", 63, 5},
+ {"greek7", 64, 6},
+ {"csISO88Greek7", 64, 13},
+ {"iso-ir-88", 64, 9},
+ {"ASMO_449", 65, 8},
+ {"ISO_9036", 65, 8},
+ {"arabic7", 65, 7},
+ {"csISO89ASMO449", 65, 14},
+ {"iso-ir-89", 65, 9},
+ {"iso-ir-90", 66, 9},
+ {"csISO90", 66, 7},
+ {"JIS_C6229-1984-a", 67, 16},
+ {"csISO91JISC62291984a", 67, 20},
+ {"iso-ir-91", 67, 9},
+ {"jp-ocr-a", 67, 8},
+ {"JIS_C6229-1984-b", 68, 16},
+ {"ISO646-JP-OCR-B", 68, 15},
+ {"csISO92JISC62991984b", 68, 20},
+ {"iso-ir-92", 68, 9},
+ {"jp-ocr-b", 68, 8},
+ {"JIS_C6229-1984-b-add", 69, 20},
+ {"csISO93JIS62291984badd", 69, 22},
+ {"iso-ir-93", 69, 9},
+ {"jp-ocr-b-add", 69, 12},
+ {"JIS_C6229-1984-hand", 70, 19},
+ {"csISO94JIS62291984hand", 70, 22},
+ {"iso-ir-94", 70, 9},
+ {"jp-ocr-hand", 70, 11},
+ {"JIS_C6229-1984-hand-add", 71, 23},
+ {"csISO95JIS62291984handadd", 71, 25},
+ {"iso-ir-95", 71, 9},
+ {"jp-ocr-hand-add", 71, 15},
+ {"JIS_C6229-1984-kana", 72, 19},
+ {"csISO96JISC62291984kana", 72, 23},
+ {"iso-ir-96", 72, 9},
+ {"ISO_2033-1983", 73, 13},
+ {"csISO2033", 73, 9},
+ {"e13b", 73, 4},
+ {"iso-ir-98", 73, 9},
+ {"ANSI_X3.110-1983", 74, 16},
+ {"CSA_T500-1983", 74, 13},
+ {"NAPLPS", 74, 6},
+ {"csISO99NAPLPS", 74, 13},
+ {"iso-ir-99", 74, 9},
+ {"T.61-7bit", 75, 9},
+ {"csISO102T617bit", 75, 15},
+ {"iso-ir-102", 75, 10},
+ {"T.61-8bit", 76, 9},
+ {"T.61", 76, 4},
+ {"csISO103T618bit", 76, 15},
+ {"iso-ir-103", 76, 10},
+ {"ECMA-cyrillic", 77, 13},
+ {"KOI8-E", 77, 6},
+ {"csISO111ECMACyrillic", 77, 20},
+ {"iso-ir-111", 77, 10},
+ {"CSA_Z243.4-1985-1", 78, 17},
+ {"ISO646-CA", 78, 9},
+ {"ca", 78, 2},
+ {"csISO121Canadian1", 78, 17},
+ {"csa7-1", 78, 6},
+ {"csa71", 78, 5},
+ {"iso-ir-121", 78, 10},
+ {"CSA_Z243.4-1985-2", 79, 17},
+ {"ISO646-CA2", 79, 10},
+ {"csISO122Canadian2", 79, 17},
+ {"csa7-2", 79, 6},
+ {"csa72", 79, 5},
+ {"iso-ir-122", 79, 10},
+ {"CSA_Z243.4-1985-gr", 80, 18},
+ {"csISO123CSAZ24341985gr", 80, 22},
+ {"iso-ir-123", 80, 10},
+ {"ISO-8859-6-E", 81, 12},
+ {"ISO_8859-6-E", 81, 12},
+ {"csISO88596E", 81, 11},
+ {"ISO-8859-6-I", 82, 12},
+ {"ISO_8859-6-I", 82, 12},
+ {"csISO88596I", 82, 11},
+ {"T.101-G2", 83, 8},
+ {"csISO128T101G2", 83, 14},
+ {"iso-ir-128", 83, 10},
+ {"ISO-8859-8-E", 84, 12},
+ {"ISO_8859-8-E", 84, 12},
+ {"csISO88598E", 84, 11},
+ {"ISO-8859-8-I", 85, 12},
+ {"ISO_8859-8-I", 85, 12},
+ {"csISO88598I", 85, 11},
+ {"CSN_369103", 86, 10},
+ {"csISO139CSN369103", 86, 17},
+ {"iso-ir-139", 86, 10},
+ {"JUS_I.B1.002", 87, 12},
+ {"ISO646-YU", 87, 9},
+ {"csISO141JUSIB1002", 87, 17},
+ {"iso-ir-141", 87, 10},
+ {"js", 87, 2},
+ {"yu", 87, 2},
+ {"IEC_P27-1", 88, 9},
+ {"csISO143IECP271", 88, 15},
+ {"iso-ir-143", 88, 10},
+ {"JUS_I.B1.003-serb", 89, 17},
+ {"csISO146Serbian", 89, 15},
+ {"iso-ir-146", 89, 10},
+ {"serbian", 89, 7},
+ {"JUS_I.B1.003-mac", 90, 16},
+ {"csISO147Macedonian", 90, 18},
+ {"iso-ir-147", 90, 10},
+ {"macedonian", 90, 10},
+ {"greek-ccitt", 91, 11},
+ {"csISO150", 91, 8},
+ {"csISO150GreekCCITT", 91, 18},
+ {"iso-ir-150", 91, 10},
+ {"NC_NC00-10:81", 92, 13},
+ {"ISO646-CU", 92, 9},
+ {"csISO151Cuba", 92, 12},
+ {"cuba", 92, 4},
+ {"iso-ir-151", 92, 10},
+ {"ISO_6937-2-25", 93, 13},
+ {"csISO6937Add", 93, 12},
+ {"iso-ir-152", 93, 10},
+ {"GOST_19768-74", 94, 13},
+ {"ST_SEV_358-88", 94, 13},
+ {"csISO153GOST1976874", 94, 19},
+ {"iso-ir-153", 94, 10},
+ {"ISO_8859-supp", 95, 13},
+ {"csISO8859Supp", 95, 13},
+ {"iso-ir-154", 95, 10},
+ {"latin1-2-5", 95, 10},
+ {"ISO_10367-box", 96, 13},
+ {"csISO10367Box", 96, 13},
+ {"iso-ir-155", 96, 10},
+ {"latin-lap", 97, 9},
+ {"csISO158Lap", 97, 11},
+ {"iso-ir-158", 97, 10},
+ {"lap", 97, 3},
+ {"JIS_X0212-1990", 98, 14},
+ {"csISO159JISX02121990", 98, 20},
+ {"iso-ir-159", 98, 10},
+ {"x0212", 98, 5},
+ {"DS_2089", 99, 7},
+ {"DS2089", 99, 6},
+ {"ISO646-DK", 99, 9},
+ {"csISO646Danish", 99, 14},
+ {"dk", 99, 2},
+ {"us-dk", 100, 5},
+ {"csUSDK", 100, 6},
+ {"dk-us", 101, 5},
+ {"csDKUS", 101, 6},
+ {"KSC5636", 102, 7},
+ {"ISO646-KR", 102, 9},
+ {"csKSC5636", 102, 9},
+ {"UNICODE-1-1-UTF-7", 103, 17},
+ {"csUnicode11UTF7", 103, 15},
+ {"ISO-2022-CN", 104, 11},
+ {"csISO2022CN", 104, 11},
+ {"ISO-2022-CN-EXT", 105, 15},
+ {"csISO2022CNEXT", 105, 14},
+ {"UTF-8", 106, 5},
+ {"csUTF8", 106, 6},
+ {"ISO-8859-13", 109, 11},
+ {"csISO885913", 109, 11},
+ {"ISO-8859-14", 110, 11},
+ {"ISO_8859-14", 110, 11},
+ {"ISO_8859-14:1998", 110, 16},
+ {"csISO885914", 110, 11},
+ {"iso-celtic", 110, 10},
+ {"iso-ir-199", 110, 10},
+ {"l8", 110, 2},
+ {"latin8", 110, 6},
+ {"ISO-8859-15", 111, 11},
+ {"ISO_8859-15", 111, 11},
+ {"Latin-9", 111, 7},
+ {"csISO885915", 111, 11},
+ {"ISO-8859-16", 112, 11},
+ {"ISO_8859-16", 112, 11},
+ {"ISO_8859-16:2001", 112, 16},
+ {"csISO885916", 112, 11},
+ {"iso-ir-226", 112, 10},
+ {"l10", 112, 3},
+ {"latin10", 112, 7},
+ {"GBK", 113, 3},
+ {"CP936", 113, 5},
+ {"MS936", 113, 5},
+ {"csGBK", 113, 5},
+ {"windows-936", 113, 11},
+ {"GB18030", 114, 7},
+ {"csGB18030", 114, 9},
+ {"OSD_EBCDIC_DF04_15", 115, 18},
+ {"csOSDEBCDICDF0415", 115, 17},
+ {"OSD_EBCDIC_DF03_IRV", 116, 19},
+ {"csOSDEBCDICDF03IRV", 116, 18},
+ {"OSD_EBCDIC_DF04_1", 117, 17},
+ {"csOSDEBCDICDF041", 117, 16},
+ {"ISO-11548-1", 118, 11},
+ {"ISO_11548-1", 118, 11},
+ {"ISO_TR_11548-1", 118, 14},
+ {"csISO115481", 118, 11},
+ {"KZ-1048", 119, 7},
+ {"RK1048", 119, 6},
+ {"STRK1048-2002", 119, 13},
+ {"csKZ1048", 119, 8},
+ {"ISO-10646-UCS-2", 1000, 15},
+ {"csUnicode", 1000, 9},
+ {"ISO-10646-UCS-4", 1001, 15},
+ {"csUCS4", 1001, 6},
+ {"ISO-10646-UCS-Basic", 1002, 19},
+ {"csUnicodeASCII", 1002, 14},
+ {"ISO-10646-Unicode-Latin1", 1003, 24},
+ {"ISO-10646", 1003, 9},
+ {"csUnicodeLatin1", 1003, 15},
+ {"ISO-10646-J-1", 1004, 13},
+ {"csUnicodeJapanese", 1004, 17},
+ {"ISO-Unicode-IBM-1261", 1005, 20},
+ {"csUnicodeIBM1261", 1005, 16},
+ {"ISO-Unicode-IBM-1268", 1006, 20},
+ {"csUnicodeIBM1268", 1006, 16},
+ {"ISO-Unicode-IBM-1276", 1007, 20},
+ {"csUnicodeIBM1276", 1007, 16},
+ {"ISO-Unicode-IBM-1264", 1008, 20},
+ {"csUnicodeIBM1264", 1008, 16},
+ {"ISO-Unicode-IBM-1265", 1009, 20},
+ {"csUnicodeIBM1265", 1009, 16},
+ {"UNICODE-1-1", 1010, 11},
+ {"csUnicode11", 1010, 11},
+ {"SCSU", 1011, 4},
+ {"csSCSU", 1011, 6},
+ {"UTF-7", 1012, 5},
+ {"csUTF7", 1012, 6},
+ {"UTF-16BE", 1013, 8},
+ {"csUTF16BE", 1013, 9},
+ {"UTF-16LE", 1014, 8},
+ {"csUTF16LE", 1014, 9},
+ {"UTF-16", 1015, 6},
+ {"csUTF16", 1015, 7},
+ {"CESU-8", 1016, 6},
+ {"csCESU-8", 1016, 8},
+ {"csCESU8", 1016, 7},
+ {"UTF-32", 1017, 6},
+ {"csUTF32", 1017, 7},
+ {"UTF-32BE", 1018, 8},
+ {"csUTF32BE", 1018, 9},
+ {"UTF-32LE", 1019, 8},
+ {"csUTF32LE", 1019, 9},
+ {"BOCU-1", 1020, 6},
+ {"csBOCU-1", 1020, 8},
+ {"csBOCU1", 1020, 7},
+ {"UTF-7-IMAP", 1021, 10},
+ {"csUTF7IMAP", 1021, 10},
+ {"ISO-8859-1-Windows-3.0-Latin-1", 2000, 30},
+ {"csWindows30Latin1", 2000, 17},
+ {"ISO-8859-1-Windows-3.1-Latin-1", 2001, 30},
+ {"csWindows31Latin1", 2001, 17},
+ {"ISO-8859-2-Windows-Latin-2", 2002, 26},
+ {"csWindows31Latin2", 2002, 17},
+ {"ISO-8859-9-Windows-Latin-5", 2003, 26},
+ {"csWindows31Latin5", 2003, 17},
+ {"hp-roman8", 2004, 9},
+ {"csHPRoman8", 2004, 10},
+ {"r8", 2004, 2},
+ {"roman8", 2004, 6},
+ {"Adobe-Standard-Encoding", 2005, 23},
+ {"csAdobeStandardEncoding", 2005, 23},
+ {"Ventura-US", 2006, 10},
+ {"csVenturaUS", 2006, 11},
+ {"Ventura-International", 2007, 21},
+ {"csVenturaInternational", 2007, 22},
+ {"DEC-MCS", 2008, 7},
+ {"csDECMCS", 2008, 8},
+ {"dec", 2008, 3},
+ {"IBM850", 2009, 6},
+ {"850", 2009, 3},
+ {"cp850", 2009, 5},
+ {"csPC850Multilingual", 2009, 19},
+ {"IBM852", 2010, 6},
+ {"852", 2010, 3},
+ {"cp852", 2010, 5},
+ {"csPCp852", 2010, 8},
+ {"IBM437", 2011, 6},
+ {"437", 2011, 3},
+ {"cp437", 2011, 5},
+ {"csPC8CodePage437", 2011, 16},
+ {"PC8-Danish-Norwegian", 2012, 20},
+ {"csPC8DanishNorwegian", 2012, 20},
+ {"IBM862", 2013, 6},
+ {"862", 2013, 3},
+ {"cp862", 2013, 5},
+ {"csPC862LatinHebrew", 2013, 18},
+ {"PC8-Turkish", 2014, 11},
+ {"csPC8Turkish", 2014, 12},
+ {"IBM-Symbols", 2015, 11},
+ {"csIBMSymbols", 2015, 12},
+ {"IBM-Thai", 2016, 8},
+ {"csIBMThai", 2016, 9},
+ {"HP-Legal", 2017, 8},
+ {"csHPLegal", 2017, 9},
+ {"HP-Pi-font", 2018, 10},
+ {"csHPPiFont", 2018, 10},
+ {"HP-Math8", 2019, 8},
+ {"csHPMath8", 2019, 9},
+ {"Adobe-Symbol-Encoding", 2020, 21},
+ {"csHPPSMath", 2020, 10},
+ {"HP-DeskTop", 2021, 10},
+ {"csHPDesktop", 2021, 11},
+ {"Ventura-Math", 2022, 12},
+ {"csVenturaMath", 2022, 13},
+ {"Microsoft-Publishing", 2023, 20},
+ {"csMicrosoftPublishing", 2023, 21},
+ {"Windows-31J", 2024, 11},
+ {"csWindows31J", 2024, 12},
+ {"GB2312", 2025, 6},
+ {"csGB2312", 2025, 8},
+ {"Big5", 2026, 4},
+ {"csBig5", 2026, 6},
+ {"macintosh", 2027, 9},
+ {"csMacintosh", 2027, 11},
+ {"mac", 2027, 3},
+ {"IBM037", 2028, 6},
+ {"cp037", 2028, 5},
+ {"csIBM037", 2028, 8},
+ {"ebcdic-cp-ca", 2028, 12},
+ {"ebcdic-cp-nl", 2028, 12},
+ {"ebcdic-cp-us", 2028, 12},
+ {"ebcdic-cp-wt", 2028, 12},
+ {"IBM038", 2029, 6},
+ {"EBCDIC-INT", 2029, 10},
+ {"cp038", 2029, 5},
+ {"csIBM038", 2029, 8},
+ {"IBM273", 2030, 6},
+ {"CP273", 2030, 5},
+ {"csIBM273", 2030, 8},
+ {"IBM274", 2031, 6},
+ {"CP274", 2031, 5},
+ {"EBCDIC-BE", 2031, 9},
+ {"csIBM274", 2031, 8},
+ {"IBM275", 2032, 6},
+ {"EBCDIC-BR", 2032, 9},
+ {"cp275", 2032, 5},
+ {"csIBM275", 2032, 8},
+ {"IBM277", 2033, 6},
+ {"EBCDIC-CP-DK", 2033, 12},
+ {"EBCDIC-CP-NO", 2033, 12},
+ {"csIBM277", 2033, 8},
+ {"IBM278", 2034, 6},
+ {"CP278", 2034, 5},
+ {"csIBM278", 2034, 8},
+ {"ebcdic-cp-fi", 2034, 12},
+ {"ebcdic-cp-se", 2034, 12},
+ {"IBM280", 2035, 6},
+ {"CP280", 2035, 5},
+ {"csIBM280", 2035, 8},
+ {"ebcdic-cp-it", 2035, 12},
+ {"IBM281", 2036, 6},
+ {"EBCDIC-JP-E", 2036, 11},
+ {"cp281", 2036, 5},
+ {"csIBM281", 2036, 8},
+ {"IBM284", 2037, 6},
+ {"CP284", 2037, 5},
+ {"csIBM284", 2037, 8},
+ {"ebcdic-cp-es", 2037, 12},
+ {"IBM285", 2038, 6},
+ {"CP285", 2038, 5},
+ {"csIBM285", 2038, 8},
+ {"ebcdic-cp-gb", 2038, 12},
+ {"IBM290", 2039, 6},
+ {"EBCDIC-JP-kana", 2039, 14},
+ {"cp290", 2039, 5},
+ {"csIBM290", 2039, 8},
+ {"IBM297", 2040, 6},
+ {"cp297", 2040, 5},
+ {"csIBM297", 2040, 8},
+ {"ebcdic-cp-fr", 2040, 12},
+ {"IBM420", 2041, 6},
+ {"cp420", 2041, 5},
+ {"csIBM420", 2041, 8},
+ {"ebcdic-cp-ar1", 2041, 13},
+ {"IBM423", 2042, 6},
+ {"cp423", 2042, 5},
+ {"csIBM423", 2042, 8},
+ {"ebcdic-cp-gr", 2042, 12},
+ {"IBM424", 2043, 6},
+ {"cp424", 2043, 5},
+ {"csIBM424", 2043, 8},
+ {"ebcdic-cp-he", 2043, 12},
+ {"IBM500", 2044, 6},
+ {"CP500", 2044, 5},
+ {"csIBM500", 2044, 8},
+ {"ebcdic-cp-be", 2044, 12},
+ {"ebcdic-cp-ch", 2044, 12},
+ {"IBM851", 2045, 6},
+ {"851", 2045, 3},
+ {"cp851", 2045, 5},
+ {"csIBM851", 2045, 8},
+ {"IBM855", 2046, 6},
+ {"855", 2046, 3},
+ {"cp855", 2046, 5},
+ {"csIBM855", 2046, 8},
+ {"IBM857", 2047, 6},
+ {"857", 2047, 3},
+ {"cp857", 2047, 5},
+ {"csIBM857", 2047, 8},
+ {"IBM860", 2048, 6},
+ {"860", 2048, 3},
+ {"cp860", 2048, 5},
+ {"csIBM860", 2048, 8},
+ {"IBM861", 2049, 6},
+ {"861", 2049, 3},
+ {"cp-is", 2049, 5},
+ {"cp861", 2049, 5},
+ {"csIBM861", 2049, 8},
+ {"IBM863", 2050, 6},
+ {"863", 2050, 3},
+ {"cp863", 2050, 5},
+ {"csIBM863", 2050, 8},
+ {"IBM864", 2051, 6},
+ {"cp864", 2051, 5},
+ {"csIBM864", 2051, 8},
+ {"IBM865", 2052, 6},
+ {"865", 2052, 3},
+ {"cp865", 2052, 5},
+ {"csIBM865", 2052, 8},
+ {"IBM868", 2053, 6},
+ {"CP868", 2053, 5},
+ {"cp-ar", 2053, 5},
+ {"csIBM868", 2053, 8},
+ {"IBM869", 2054, 6},
+ {"869", 2054, 3},
+ {"cp-gr", 2054, 5},
+ {"cp869", 2054, 5},
+ {"csIBM869", 2054, 8},
+ {"IBM870", 2055, 6},
+ {"CP870", 2055, 5},
+ {"csIBM870", 2055, 8},
+ {"ebcdic-cp-roece", 2055, 15},
+ {"ebcdic-cp-yu", 2055, 12},
+ {"IBM871", 2056, 6},
+ {"CP871", 2056, 5},
+ {"csIBM871", 2056, 8},
+ {"ebcdic-cp-is", 2056, 12},
+ {"IBM880", 2057, 6},
+ {"EBCDIC-Cyrillic", 2057, 15},
+ {"cp880", 2057, 5},
+ {"csIBM880", 2057, 8},
+ {"IBM891", 2058, 6},
+ {"cp891", 2058, 5},
+ {"csIBM891", 2058, 8},
+ {"IBM903", 2059, 6},
+ {"cp903", 2059, 5},
+ {"csIBM903", 2059, 8},
+ {"IBM904", 2060, 6},
+ {"904", 2060, 3},
+ {"cp904", 2060, 5},
+ {"csIBBM904", 2060, 9},
+ {"IBM905", 2061, 6},
+ {"CP905", 2061, 5},
+ {"csIBM905", 2061, 8},
+ {"ebcdic-cp-tr", 2061, 12},
+ {"IBM918", 2062, 6},
+ {"CP918", 2062, 5},
+ {"csIBM918", 2062, 8},
+ {"ebcdic-cp-ar2", 2062, 13},
+ {"IBM1026", 2063, 7},
+ {"CP1026", 2063, 6},
+ {"csIBM1026", 2063, 9},
+ {"EBCDIC-AT-DE", 2064, 12},
+ {"csIBMEBCDICATDE", 2064, 15},
+ {"EBCDIC-AT-DE-A", 2065, 14},
+ {"csEBCDICATDEA", 2065, 13},
+ {"EBCDIC-CA-FR", 2066, 12},
+ {"csEBCDICCAFR", 2066, 12},
+ {"EBCDIC-DK-NO", 2067, 12},
+ {"csEBCDICDKNO", 2067, 12},
+ {"EBCDIC-DK-NO-A", 2068, 14},
+ {"csEBCDICDKNOA", 2068, 13},
+ {"EBCDIC-FI-SE", 2069, 12},
+ {"csEBCDICFISE", 2069, 12},
+ {"EBCDIC-FI-SE-A", 2070, 14},
+ {"csEBCDICFISEA", 2070, 13},
+ {"EBCDIC-FR", 2071, 9},
+ {"csEBCDICFR", 2071, 10},
+ {"EBCDIC-IT", 2072, 9},
+ {"csEBCDICIT", 2072, 10},
+ {"EBCDIC-PT", 2073, 9},
+ {"csEBCDICPT", 2073, 10},
+ {"EBCDIC-ES", 2074, 9},
+ {"csEBCDICES", 2074, 10},
+ {"EBCDIC-ES-A", 2075, 11},
+ {"csEBCDICESA", 2075, 11},
+ {"EBCDIC-ES-S", 2076, 11},
+ {"csEBCDICESS", 2076, 11},
+ {"EBCDIC-UK", 2077, 9},
+ {"csEBCDICUK", 2077, 10},
+ {"EBCDIC-US", 2078, 9},
+ {"csEBCDICUS", 2078, 10},
+ {"UNKNOWN-8BIT", 2079, 12},
+ {"csUnknown8BiT", 2079, 13},
+ {"MNEMONIC", 2080, 8},
+ {"csMnemonic", 2080, 10},
+ {"MNEM", 2081, 4},
+ {"csMnem", 2081, 6},
+ {"VISCII", 2082, 6},
+ {"csVISCII", 2082, 8},
+ {"VIQR", 2083, 4},
+ {"csVIQR", 2083, 6},
+ {"KOI8-R", 2084, 6},
+ {"csKOI8R", 2084, 7},
+ {"HZ-GB-2312", 2085, 10},
+ {"IBM866", 2086, 6},
+ {"866", 2086, 3},
+ {"cp866", 2086, 5},
+ {"csIBM866", 2086, 8},
+ {"IBM775", 2087, 6},
+ {"cp775", 2087, 5},
+ {"csPC775Baltic", 2087, 13},
+ {"KOI8-U", 2088, 6},
+ {"csKOI8U", 2088, 7},
+ {"IBM00858", 2089, 8},
+ {"CCSID00858", 2089, 10},
+ {"CP00858", 2089, 7},
+ {"PC-Multilingual-850+euro", 2089, 24},
+ {"csIBM00858", 2089, 10},
+ {"IBM00924", 2090, 8},
+ {"CCSID00924", 2090, 10},
+ {"CP00924", 2090, 7},
+ {"csIBM00924", 2090, 10},
+ {"ebcdic-Latin9--euro", 2090, 19},
+ {"IBM01140", 2091, 8},
+ {"CCSID01140", 2091, 10},
+ {"CP01140", 2091, 7},
+ {"csIBM01140", 2091, 10},
+ {"ebcdic-us-37+euro", 2091, 17},
+ {"IBM01141", 2092, 8},
+ {"CCSID01141", 2092, 10},
+ {"CP01141", 2092, 7},
+ {"csIBM01141", 2092, 10},
+ {"ebcdic-de-273+euro", 2092, 18},
+ {"IBM01142", 2093, 8},
+ {"CCSID01142", 2093, 10},
+ {"CP01142", 2093, 7},
+ {"csIBM01142", 2093, 10},
+ {"ebcdic-dk-277+euro", 2093, 18},
+ {"ebcdic-no-277+euro", 2093, 18},
+ {"IBM01143", 2094, 8},
+ {"CCSID01143", 2094, 10},
+ {"CP01143", 2094, 7},
+ {"csIBM01143", 2094, 10},
+ {"ebcdic-fi-278+euro", 2094, 18},
+ {"ebcdic-se-278+euro", 2094, 18},
+ {"IBM01144", 2095, 8},
+ {"CCSID01144", 2095, 10},
+ {"CP01144", 2095, 7},
+ {"csIBM01144", 2095, 10},
+ {"ebcdic-it-280+euro", 2095, 18},
+ {"IBM01145", 2096, 8},
+ {"CCSID01145", 2096, 10},
+ {"CP01145", 2096, 7},
+ {"csIBM01145", 2096, 10},
+ {"ebcdic-es-284+euro", 2096, 18},
+ {"IBM01146", 2097, 8},
+ {"CCSID01146", 2097, 10},
+ {"CP01146", 2097, 7},
+ {"csIBM01146", 2097, 10},
+ {"ebcdic-gb-285+euro", 2097, 18},
+ {"IBM01147", 2098, 8},
+ {"CCSID01147", 2098, 10},
+ {"CP01147", 2098, 7},
+ {"csIBM01147", 2098, 10},
+ {"ebcdic-fr-297+euro", 2098, 18},
+ {"IBM01148", 2099, 8},
+ {"CCSID01148", 2099, 10},
+ {"CP01148", 2099, 7},
+ {"csIBM01148", 2099, 10},
+ {"ebcdic-international-500+euro", 2099, 29},
+ {"IBM01149", 2100, 8},
+ {"CCSID01149", 2100, 10},
+ {"CP01149", 2100, 7},
+ {"csIBM01149", 2100, 10},
+ {"ebcdic-is-871+euro", 2100, 18},
+ {"Big5-HKSCS", 2101, 10},
+ {"csBig5HKSCS", 2101, 11},
+ {"IBM1047", 2102, 7},
+ {"IBM-1047", 2102, 8},
+ {"csIBM1047", 2102, 9},
+ {"PTCP154", 2103, 7},
+ {"CP154", 2103, 5},
+ {"Cyrillic-Asian", 2103, 14},
+ {"PT154", 2103, 5},
+ {"csPTCP154", 2103, 9},
+ {"Amiga-1251", 2104, 10},
+ {"Ami-1251", 2104, 8},
+ {"Ami1251", 2104, 7},
+ {"Amiga1251", 2104, 9},
+ {"csAmiga1251", 2104, 11},
+ {"KOI7-switched", 2105, 13},
+ {"csKOI7switched", 2105, 14},
+ {"BRF", 2106, 3},
+ {"csBRF", 2106, 5},
+ {"TSCII", 2107, 5},
+ {"csTSCII", 2107, 7},
+ {"CP51932", 2108, 7},
+ {"csCP51932", 2108, 9},
+ {"windows-874", 2109, 11},
+ {"cswindows874", 2109, 12},
+ {"windows-1250", 2250, 12},
+ {"cswindows1250", 2250, 13},
+ {"windows-1251", 2251, 12},
+ {"cswindows1251", 2251, 13},
+ {"windows-1252", 2252, 12},
+ {"cswindows1252", 2252, 13},
+ {"windows-1253", 2253, 12},
+ {"cswindows1253", 2253, 13},
+ {"windows-1254", 2254, 12},
+ {"cswindows1254", 2254, 13},
+ {"windows-1255", 2255, 12},
+ {"cswindows1255", 2255, 13},
+ {"windows-1256", 2256, 12},
+ {"cswindows1256", 2256, 13},
+ {"windows-1257", 2257, 12},
+ {"cswindows1257", 2257, 13},
+ {"windows-1258", 2258, 12},
+ {"cswindows1258", 2258, 13},
+ {"TIS-620", 2259, 7},
+ {"ISO-8859-11", 2259, 11},
+ {"csTIS620", 2259, 8},
+ {"CP50220", 2260, 7},
+ {"csCP50220", 2260, 9},
};
const __encoding_data* __encoding_rep_ = __text_encoding_data + 1;
>From 9ae9a2ad69165fc10823dd3042236aaff670c3d3 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:08:31 -0400
Subject: [PATCH 49/54] Make sure to walk backwards to get primary name when
searching by name
---
libcxx/include/__text_encoding/text_encoding.h | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index b4f3d7d5c01db..eb81212c95ba6 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -549,7 +549,15 @@ struct text_encoding {
auto __data_ptr = __text_encoding_data + 2, __data_last = std::end(__text_encoding_data);
auto __found_data = std::find(__data_ptr, __data_last, __a);
- return __found_data != __data_last ? __found_data : __text_encoding_data; // other
+ if (__found_data == __data_last) {
+ return __text_encoding_data; // other
+ }
+
+ while (__found_data[-1].__mib_rep_ == __found_data->__mib_rep_) {
+ __found_data--;
+ }
+
+ return __found_data;
}
_LIBCPP_HIDE_FROM_ABI static constexpr const __encoding_data* __find_encoding_data_by_id(id __i) {
>From bd145a4b53f364b6b6a5708ed80279b7f8105871 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:12:17 -0400
Subject: [PATCH 50/54] No need to walk backwards when building aliases_view
since we should already be on the primary encoding name
---
libcxx/include/__text_encoding/text_encoding.h | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index eb81212c95ba6..4b0a900bedb67 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -458,16 +458,9 @@ struct text_encoding {
};
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr aliases_view aliases() const noexcept {
- auto __rep = __encoding_rep_;
- if (__encoding_rep_->__name_[0]) {
- while (__rep > std::begin(__text_encoding_data) && __rep[-1].__mib_rep_ == __encoding_rep_->__mib_rep_) {
- __rep--;
- }
- } else {
- __rep = nullptr;
- }
-
- return aliases_view(__rep);
+ if (!__encoding_rep_->__name_[0])
+ return aliases_view(nullptr);
+ return aliases_view(__encoding_rep_);
}
_LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) noexcept {
>From dfc737f3e5b52a7e2551427d9814d49e2db46062 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:27:53 -0400
Subject: [PATCH 51/54] Update transitive includes
---
libcxx/test/libcxx/transitive_includes/cxx26.csv | 1 -
1 file changed, 1 deletion(-)
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index a795b872a6646..b93e86e74f537 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -1069,7 +1069,6 @@ text_encoding cctype
text_encoding compare
text_encoding cstdint
text_encoding cstdio
-text_encoding cstdlib
text_encoding cstring
text_encoding cwchar
text_encoding cwctype
>From f05e695bfa0245a98f6338c8fd40997a35acbcd9 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:30:54 -0400
Subject: [PATCH 52/54] Put in a missing ranges include
---
libcxx/include/__text_encoding/text_encoding.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/libcxx/include/__text_encoding/text_encoding.h b/libcxx/include/__text_encoding/text_encoding.h
index 4b0a900bedb67..a518716f5facc 100644
--- a/libcxx/include/__text_encoding/text_encoding.h
+++ b/libcxx/include/__text_encoding/text_encoding.h
@@ -23,6 +23,7 @@
#include <__assert>
#include <__functional/hash.h>
#include <__iterator/iterator_traits.h>
+#include <__ranges/enable_borrowed_range.h>
#include <__ranges/view_interface.h>
#include <__string/char_traits.h>
#include <__text_encoding/get_locale_encoding.h>
>From 1117fa3fd1b011e7f7e5aa7c76b67cebc8f466ba Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:58:00 -0400
Subject: [PATCH 53/54] Properly fill out text_encoding.inc
---
libcxx/modules/std/text_encoding.inc | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/libcxx/modules/std/text_encoding.inc b/libcxx/modules/std/text_encoding.inc
index c425b98daa5a0..8ad1e7bfced43 100644
--- a/libcxx/modules/std/text_encoding.inc
+++ b/libcxx/modules/std/text_encoding.inc
@@ -9,8 +9,10 @@
export namespace std {
#if _LIBCPP_STD_VER >= 26
- using ::std::hash;
- using ::std::ranges::enable_borrowed_range;
- using ::std::text_encoding;
+ using std::hash;
+ namespace ranges {
+ using std::ranges::enable_borrowed_range;
+ }
+ using std::text_encoding;
#endif
} // namespace std
>From 822acd5f9345f0e73d69e4a39e09c85d24e889c0 Mon Sep 17 00:00:00 2001
From: William Tran-Viet <wtranviet at proton.me>
Date: Wed, 28 May 2025 23:58:17 -0400
Subject: [PATCH 54/54] Update environment.pass.cpp to check for the
environment's default locale instead of assuming the "C" locale
---
.../environment.pass.cpp | 49 ++++++++++++-------
1 file changed, 30 insertions(+), 19 deletions(-)
diff --git a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
index 2aaa6859ad6a9..2c5eacda3b92b 100644
--- a/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
+++ b/libcxx/test/std/utilities/text_encoding/text_encoding.members/environment.pass.cpp
@@ -19,8 +19,8 @@
// text_encoding text_encoding::environment();
// Concerns:
-// 1. text_encoding::environment() returns the encoding for the "C" locale, which should be the default for any C++ program.
-// 2. text_encoding::environment() still returns the "C" locale encoding when the locale is set to "en_US.UTF-8".
+// 1. text_encoding::environment() returns the encoding for the environment's default locale.
+// 2. text_encoding::environment() still returns the default locale encoding when the locale is set to "en_US.UTF-8".
// 3. text_encoding::environment() is affected by changes to the "LANG" environment variable.
// The current implementation of text_encoding::environment() while conformant,
@@ -29,6 +29,7 @@
#include <cassert>
#include <clocale>
#include <cstdlib>
+#include <string>
#include <string_view>
#include <text_encoding>
@@ -36,30 +37,40 @@
#include "test_macros.h"
#include "test_text_encoding.h"
-int main() {
- { // 1
- auto te = std::text_encoding::environment();
+std::string extractEncodingFromLocale(std::string locale_str) {
+ auto dot_pos = locale_str.find('.'), at_pos = locale_str.find('@');
- assert(te == std::text_encoding::environment());
- assert(te.mib() == std::text_encoding::id::ASCII);
- assert(te == std::text_encoding::id::ASCII);
- assert(std::string_view(te.name()) == "ANSI_X3.4-1968");
- assert(te == std::text_encoding("ANSI_X3.4-1968"));
+ if(dot_pos == std::string::npos) {
+ return "ANSI_X3.4-1968"; // default is ASCII
+ }
- assert(std::text_encoding::environment_is<std::text_encoding::id::ASCII>());
+ if(at_pos == std::string::npos) {
+ return locale_str.substr(dot_pos + 1);
}
+
+ return locale_str.substr(dot_pos + 1, at_pos - 1 - dot_pos);
+}
- { // 2
- std::setlocale(LC_ALL, "en_US.UTF-8");
+int main() {
+ auto default_locale = std::setlocale(LC_ALL, nullptr);
+ auto default_encoding = extractEncodingFromLocale(std::string(default_locale));
+ auto default_te = std::text_encoding(default_encoding);
+
+ { // 1
+ auto env_te = std::text_encoding::environment();
+ assert(env_te == std::text_encoding::environment());
+ assert(env_te == default_te);
+ assert(env_te.mib() == default_te.mib());
+ }
- auto te = std::text_encoding::environment();
+ { // 2
+ std::setlocale(LC_ALL, LOCALE_en_US_UTF_8);
- assert(te == std::text_encoding::environment());
- assert(te.mib() == std::text_encoding::id::ASCII);
- assert(std::string_view(te.name()) == "ANSI_X3.4-1968");
- assert(te == std::text_encoding("ANSI_X3.4-1968"));
+ auto env_te = std::text_encoding::environment();
- assert(std::text_encoding::environment_is<std::text_encoding::id::ASCII>());
+ assert(env_te == std::text_encoding::environment());
+ assert(env_te == default_te);
+ assert(env_te.mib() == default_te.mib());
}
{ // 3
More information about the libcxx-commits
mailing list