[libcxx-commits] [clang] [libcxx] [llvm] [Clang] Add warnings when mixing different charN_t types (PR #138708)
Tom Honermann via libcxx-commits
libcxx-commits at lists.llvm.org
Thu May 8 13:37:02 PDT 2025
================
@@ -0,0 +1,155 @@
+// RUN: %clang_cc1 -verify -fsyntax-only -std=c++20 -Wconversion %s
+
+void c8(char8_t);
+void c16(char16_t);
+void c32(char32_t);
+
+void test(char8_t u8, char16_t u16, char32_t u32) {
+ c8(u8);
+ c8(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' may lose precision and change the meaning of the represented code unit}}
+ c8(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' may lose precision and change the meaning of the represented code unit}}
+
+ c16(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char16_t' may change the meaning of the represented code unit}}
+ c16(u16);
+ c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}}
+
+ c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}}
+ c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}}
+ c32(u32);
+
+
+ c8(char32_t(0x7f));
+ c8(char32_t(0x80)); // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the codepoint '<U+0080>'}}
+
+ c8(char16_t(0x7f));
+ c8(char16_t(0x80)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the codepoint '<U+0080>'}}
+ c8(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code unit '<0xD800>'}}
+ c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the codepoint '<U+E000>'}}
+
+
+ c16(char32_t(0x7f));
+ c16(char32_t(0x80));
+ c16(char32_t(0xD7FF));
+ c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
+ c16(char32_t(0xE000));
+ c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the codepoint '🐉'}}
+
+
+ c32(char8_t(0x7f));
+ c32(char8_t(0x80)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0x80>'}}
+ c32(char8_t(0xFF)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0xFF>'}}
+
+
+ c32(char16_t(0x7f));
+ c32(char16_t(0x80));
+
+ c32(char16_t(0xD7FF));
+ c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}}
+ c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}}
+ c32(char16_t(0xE000));
+ c32(char16_t(u'☕'));
+
+ (void)static_cast<char32_t>(char8_t(0x80)); // sanity check: no explicit conversion;
+
+ using Char8 = char8_t;
+ Char8 c81 = u16; // expected-warning {{implicit conversion from 'char16_t' to 'Char8' (aka 'char8_t') may lose precision and change the meaning of the represented code unit}}
+
+ [[maybe_unused]] char c = u16; // expected-warning {{implicit conversion loses integer precision: 'char16_t' to 'char'}}
+
+ // FIXME: We should apply the same logic to wchar
+ [[maybe_unused]] wchar_t wc = u16;
+ [[maybe_unused]] wchar_t wc2 = u8;
+}
+
+void test_comp(char8_t u8, char16_t u16, char32_t u32) {
+ (void)(u8 == u8' ');
+ (void)(u8 == u' '); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' may compare different codepoints}}
+ (void)(u8 == U' '); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' may compare different codepoints}}
----------------
tahonermann wrote:
I think these warnings should be suppressed since the RHS may be constant evaluated and designates a character that is representable with a single code unit in the type of the LHS.
https://github.com/llvm/llvm-project/pull/138708
More information about the libcxx-commits
mailing list