[clang] [Clang] Do not warn on UTF-16 -> UTF-32 conversions. (PR #163927)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 17 01:52:12 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Corentin Jabot (cor3ntin)
<details>
<summary>Changes</summary>
UTF-16 to UTF-16 conversions seems widespread,
and lone surrogate have a distinct representation in UTF-32.
Lets not warn on this case to make the warning easier to adopt. This follows SG-16 guideline
https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2025/p3695r2.html#changes-since-r1
Fixes #<!-- -->163719
---
Full diff: https://github.com/llvm/llvm-project/pull/163927.diff
2 Files Affected:
- (modified) clang/lib/Sema/SemaChecking.cpp (+8-1)
- (modified) clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp (+4-4)
``````````diff
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 4f409ca0f414d..0300d09be0420 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -12309,13 +12309,20 @@ static void DiagnoseMixedUnicodeImplicitConversion(Sema &S, const Type *Source,
SourceLocation CC) {
assert(Source->isUnicodeCharacterType() && Target->isUnicodeCharacterType() &&
Source != Target);
+
+ // Lone surrogates have a distinct representation in UTF-32.
+ // Converting betweem UTF-16 and UTF-32 codepoint seems very widespread,
+ // so don't warn on such conversion.
+ if(Source->isChar16Type() && Target->isChar32Type())
+ return;
+
Expr::EvalResult Result;
if (E->EvaluateAsInt(Result, S.getASTContext(), Expr::SE_AllowSideEffects,
S.isConstantEvaluatedContext())) {
llvm::APSInt Value(32);
Value = Result.Val.getInt();
bool IsASCII = Value <= 0x7F;
- bool IsBMP = Value <= 0xD7FF || (Value >= 0xE000 && Value <= 0xFFFF);
+ bool IsBMP = Value <= 0xDFFF || (Value >= 0xE000 && Value <= 0xFFFF);
bool ConversionPreservesSemantics =
IsASCII || (!Source->isChar8Type() && !Target->isChar8Type() && IsBMP);
diff --git a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp b/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
index fcff006d0e028..f17f20ca25295 100644
--- a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
+++ b/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
@@ -14,7 +14,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c16(u32); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' may lose precision and change the meaning of the represented code unit}}
c32(u8); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' may change the meaning of the represented code unit}}
- c32(u16); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' may change the meaning of the represented code unit}}
+ c32(u16);
c32(u32);
@@ -30,7 +30,7 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c16(char32_t(0x7f));
c16(char32_t(0x80));
c16(char32_t(0xD7FF));
- c16(char32_t(0xD800)); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code unit '<0xD800>'}}
+ c16(char32_t(0xD800));
c16(char32_t(0xE000));
c16(char32_t(U'🐉')); // expected-warning {{implicit conversion from 'char32_t' to 'char16_t' changes the meaning of the code point '🐉'}}
@@ -44,8 +44,8 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
c32(char16_t(0x80));
c32(char16_t(0xD7FF));
- c32(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xD800>'}}
- c32(char16_t(0xDFFF)); // expected-warning {{implicit conversion from 'char16_t' to 'char32_t' changes the meaning of the code unit '<0xDFFF>'}}
+ c32(char16_t(0xD800));
+ c32(char16_t(0xDFFF));
c32(char16_t(0xE000));
c32(char16_t(u'☕'));
``````````
</details>
https://github.com/llvm/llvm-project/pull/163927
More information about the cfe-commits
mailing list