[libcxx-commits] [libcxx] Implement P1885R12: `<text_encoding>` header (PR #141312)
Nikolas Klauser via libcxx-commits
libcxx-commits at lists.llvm.org
Sat May 24 06:05:37 PDT 2025
================
@@ -0,0 +1,1482 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
+#define _LIBCPP___TEXT_ENCODING_TEXT_ENCODING_H
+
+#include <__config>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+#if _LIBCPP_HAS_LOCALIZATION
+
+# include <__algorithm/copy_n.h>
+# include <__algorithm/lower_bound.h>
+# include <__algorithm/min.h>
+# include <__functional/hash.h>
+# include <__iterator/iterator_traits.h>
+# include <__locale_dir/locale_base_api.h>
+# include <__ranges/view_interface.h>
+# include <__string/char_traits.h>
+# include <__utility/unreachable.h>
+# include <cstdint>
+# include <string_view>
+
+_LIBCPP_PUSH_MACROS
+# include <__undef_macros>
+
+# if _LIBCPP_STD_VER >= 26
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+struct _LIBCPP_EXPORTED_FROM_ABI text_encoding {
+ static constexpr size_t max_name_length = 63;
+
+private:
+ struct __encoding_data {
+ using __id_rep _LIBCPP_NODEBUG = int_least32_t;
+ __id_rep __mib_rep;
+ const char* __name;
+
+ friend constexpr bool operator==(const __encoding_data& __e, const __encoding_data& __other) _NOEXCEPT {
+ return __e.__mib_rep == __other.__mib_rep || __comp_name(__e.__name, __other.__name);
+ }
+
+ friend constexpr bool operator<(const __encoding_data& __e, const __id_rep __i) _NOEXCEPT {
+ return __e.__mib_rep < __i;
+ }
+ };
+
+public:
+ enum class id : __encoding_data::__id_rep {
+ other = 1,
+ unknown = 2,
+ ASCII = 3,
+ ISOLatin1 = 4,
+ ISOLatin2 = 5,
+ ISOLatin3 = 6,
+ ISOLatin4 = 7,
+ ISOLatinCyrillic = 8,
+ ISOLatinArabic = 9,
+ ISOLatinGreek = 10,
+ ISOLatinHebrew = 11,
+ ISOLatin5 = 12,
+ ISOLatin6 = 13,
+ ISOTextComm = 14,
+ HalfWidthKatakana = 15,
+ JISEncoding = 16,
+ ShiftJIS = 17,
+ EUCPkdFmtJapanese = 18,
+ EUCFixWidJapanese = 19,
+ ISO4UnitedKingdom = 20,
+ ISO11SwedishForNames = 21,
+ ISO15Italian = 22,
+ ISO17Spanish = 23,
+ ISO21German = 24,
+ ISO60DanishNorwegian = 25,
+ ISO69French = 26,
+ ISO10646UTF1 = 27,
+ ISO646basic1983 = 28,
+ INVARIANT = 29,
+ ISO2IntlRefVersion = 30,
+ NATSSEFI = 31,
+ NATSSEFIADD = 32,
+ NATSDANO = 33,
+ NATSDANOADD = 34,
+ ISO10Swedish = 35,
+ KSC56011987 = 36,
+ ISO2022KR = 37,
+ EUCKR = 38,
+ ISO2022JP = 39,
+ ISO2022JP2 = 40,
+ ISO13JISC6220jp = 41,
+ ISO14JISC6220ro = 42,
+ ISO16Portuguese = 43,
+ ISO18Greek7Old = 44,
+ ISO19LatinGreek = 45,
+ ISO25French = 46,
+ ISO27LatinGreek1 = 47,
+ ISO5427Cyrillic = 48,
+ ISO42JISC62261978 = 49,
+ ISO47BSViewdata = 50,
+ ISO49INIS = 51,
+ ISO50INIS8 = 52,
+ ISO51INISCyrillic = 53,
+ ISO54271981 = 54,
+ ISO5428Greek = 55,
+ ISO57GB1988 = 56,
+ ISO58GB231280 = 57,
+ ISO61Norwegian2 = 58,
+ ISO70VideotexSupp1 = 59,
+ ISO84Portuguese2 = 60,
+ ISO85Spanish2 = 61,
+ ISO86Hungarian = 62,
+ ISO87JISX0208 = 63,
+ ISO88Greek7 = 64,
+ ISO89ASMO449 = 65,
+ ISO90 = 66,
+ ISO91JISC62291984a = 67,
+ ISO92JISC62991984b = 68,
+ ISO93JIS62291984badd = 69,
+ ISO94JIS62291984hand = 70,
+ ISO95JIS62291984handadd = 71,
+ ISO96JISC62291984kana = 72,
+ ISO2033 = 73,
+ ISO99NAPLPS = 74,
+ ISO102T617bit = 75,
+ ISO103T618bit = 76,
+ ISO111ECMACyrillic = 77,
+ ISO121Canadian1 = 78,
+ ISO122Canadian2 = 79,
+ ISO123CSAZ24341985gr = 80,
+ ISO88596E = 81,
+ ISO88596I = 82,
+ ISO128T101G2 = 83,
+ ISO88598E = 84,
+ ISO88598I = 85,
+ ISO139CSN369103 = 86,
+ ISO141JUSIB1002 = 87,
+ ISO143IECP271 = 88,
+ ISO146Serbian = 89,
+ ISO147Macedonian = 90,
+ ISO150 = 91,
+ ISO151Cuba = 92,
+ ISO6937Add = 93,
+ ISO153GOST1976874 = 94,
+ ISO8859Supp = 95,
+ ISO10367Box = 96,
+ ISO158Lap = 97,
+ ISO159JISX02121990 = 98,
+ ISO646Danish = 99,
+ USDK = 100,
+ DKUS = 101,
+ KSC5636 = 102,
+ Unicode11UTF7 = 103,
+ ISO2022CN = 104,
+ ISO2022CNEXT = 105,
+ UTF8 = 106,
+ ISO885913 = 109,
+ ISO885914 = 110,
+ ISO885915 = 111,
+ ISO885916 = 112,
+ GBK = 113,
+ GB18030 = 114,
+ OSDEBCDICDF0415 = 115,
+ OSDEBCDICDF03IRV = 116,
+ OSDEBCDICDF041 = 117,
+ ISO115481 = 118,
+ KZ1048 = 119,
+ UCS2 = 1000,
+ UCS4 = 1001,
+ UnicodeASCII = 1002,
+ UnicodeLatin1 = 1003,
+ UnicodeJapanese = 1004,
+ UnicodeIBM1261 = 1005,
+ UnicodeIBM1268 = 1006,
+ UnicodeIBM1276 = 1007,
+ UnicodeIBM1264 = 1008,
+ UnicodeIBM1265 = 1009,
+ Unicode11 = 1010,
+ SCSU = 1011,
+ UTF7 = 1012,
+ UTF16BE = 1013,
+ UTF16LE = 1014,
+ UTF16 = 1015,
+ CESU8 = 1016,
+ UTF32 = 1017,
+ UTF32BE = 1018,
+ UTF32LE = 1019,
+ BOCU1 = 1020,
+ UTF7IMAP = 1021,
+ Windows30Latin1 = 2000,
+ Windows31Latin1 = 2001,
+ Windows31Latin2 = 2002,
+ Windows31Latin5 = 2003,
+ HPRoman8 = 2004,
+ AdobeStandardEncoding = 2005,
+ VenturaUS = 2006,
+ VenturaInternational = 2007,
+ DECMCS = 2008,
+ PC850Multilingual = 2009,
+ PC8DanishNorwegian = 2012,
+ PC862LatinHebrew = 2013,
+ PC8Turkish = 2014,
+ IBMSymbols = 2015,
+ IBMThai = 2016,
+ HPLegal = 2017,
+ HPPiFont = 2018,
+ HPMath8 = 2019,
+ HPPSMath = 2020,
+ HPDesktop = 2021,
+ VenturaMath = 2022,
+ MicrosoftPublishing = 2023,
+ Windows31J = 2024,
+ GB2312 = 2025,
+ Big5 = 2026,
+ Macintosh = 2027,
+ IBM037 = 2028,
+ IBM038 = 2029,
+ IBM273 = 2030,
+ IBM274 = 2031,
+ IBM275 = 2032,
+ IBM277 = 2033,
+ IBM278 = 2034,
+ IBM280 = 2035,
+ IBM281 = 2036,
+ IBM284 = 2037,
+ IBM285 = 2038,
+ IBM290 = 2039,
+ IBM297 = 2040,
+ IBM420 = 2041,
+ IBM423 = 2042,
+ IBM424 = 2043,
+ PC8CodePage437 = 2011,
+ IBM500 = 2044,
+ IBM851 = 2045,
+ PCp852 = 2010,
+ IBM855 = 2046,
+ IBM857 = 2047,
+ IBM860 = 2048,
+ IBM861 = 2049,
+ IBM863 = 2050,
+ IBM864 = 2051,
+ IBM865 = 2052,
+ IBM868 = 2053,
+ IBM869 = 2054,
+ IBM870 = 2055,
+ IBM871 = 2056,
+ IBM880 = 2057,
+ IBM891 = 2058,
+ IBM903 = 2059,
+ IBBM904 = 2060,
+ IBM905 = 2061,
+ IBM918 = 2062,
+ IBM1026 = 2063,
+ IBMEBCDICATDE = 2064,
+ EBCDICATDEA = 2065,
+ EBCDICCAFR = 2066,
+ EBCDICDKNO = 2067,
+ EBCDICDKNOA = 2068,
+ EBCDICFISE = 2069,
+ EBCDICFISEA = 2070,
+ EBCDICFR = 2071,
+ EBCDICIT = 2072,
+ EBCDICPT = 2073,
+ EBCDICES = 2074,
+ EBCDICESA = 2075,
+ EBCDICESS = 2076,
+ EBCDICUK = 2077,
+ EBCDICUS = 2078,
+ Unknown8BiT = 2079,
+ Mnemonic = 2080,
+ Mnem = 2081,
+ VISCII = 2082,
+ VIQR = 2083,
+ KOI8R = 2084,
+ HZGB2312 = 2085,
+ IBM866 = 2086,
+ PC775Baltic = 2087,
+ KOI8U = 2088,
+ IBM00858 = 2089,
+ IBM00924 = 2090,
+ IBM01140 = 2091,
+ IBM01141 = 2092,
+ IBM01142 = 2093,
+ IBM01143 = 2094,
+ IBM01144 = 2095,
+ IBM01145 = 2096,
+ IBM01146 = 2097,
+ IBM01147 = 2098,
+ IBM01148 = 2099,
+ IBM01149 = 2100,
+ Big5HKSCS = 2101,
+ IBM1047 = 2102,
+ PTCP154 = 2103,
+ Amiga1251 = 2104,
+ KOI7switched = 2105,
+ BRF = 2106,
+ TSCII = 2107,
+ CP51932 = 2108,
+ windows874 = 2109,
+ windows1250 = 2250,
+ windows1251 = 2251,
+ windows1252 = 2252,
+ windows1253 = 2253,
+ windows1254 = 2254,
+ windows1255 = 2255,
+ windows1256 = 2256,
+ windows1257 = 2257,
+ windows1258 = 2258,
+ TIS620 = 2259,
+ CP50220 = 2260,
+ reserved = 3000
+ };
+
+ using enum id;
+
+ _LIBCPP_HIDE_FROM_ABI constexpr text_encoding() = default;
+ _LIBCPP_HIDE_FROM_ABI constexpr explicit text_encoding(string_view __enc) _NOEXCEPT
+ : __encoding_rep_(__find_encoding_data(__enc)) {
+ __enc.copy(__name_, max_name_length, 0);
+ }
+ _LIBCPP_HIDE_FROM_ABI constexpr text_encoding(id __i) _NOEXCEPT : __encoding_rep_(__find_encoding_data_by_id(__i)) {
+ if (__encoding_rep_->__name[0] != '\0')
+ std::copy_n(__encoding_rep_->__name, std::char_traits<char>::length(__encoding_rep_->__name), __name_);
----------------
philnik777 wrote:
I'd rather save the size and avoid this call entirely. I'm pretty sure we can get away with not even increasing the size of the struct, since it's at most 63.
https://github.com/llvm/llvm-project/pull/141312
More information about the libcxx-commits
mailing list