[clang] [llvm] [Clang] Reduce the number of ways we have to print a codepoint. (PR #206990)

Wed Jul 1 08:10:32 PDT 2026

llvmorg-github-actions[bot] wrote:




@llvm/pr-subscribers-llvm-support

Author: Corentin Jabot (cor3ntin)

<details>
<summary>Changes</summary>

In #206326, we introduced yet another way to format a codepoint, so we try to claw back some consistency.

- When we display a single character we do so inside quotes
- Non-decodable code units are displayed as <0xYY>
- In strings
    - we render printable and format characters
    - we print <U+XXXX> for non printable, non-format characters

- In characters
    - we only render non-format print characters
    - we always print the codepoint as U+XXXX for non-ascii characters

This does not change `WriteCharValueForDiagnostic` which is deeply weird and was contentious a while back.

So we still have 2 different formatting utilities. Which is better than 4!

I've also added a test for invalid utf-8 in \N{}

---

Patch is 23.46 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/206990.diff


13 Files Affected:

- (modified) clang/include/clang/Basic/Diagnostic.h (+2-1) 
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (+2-2) 
- (modified) clang/lib/AST/ASTDiagnostic.cpp (+2-6) 
- (modified) clang/lib/Basic/Diagnostic.cpp (+51-27) 
- (modified) clang/lib/Lex/LiteralSupport.cpp (+3-9) 
- (added) clang/test/Lexer/char-escapes-delimited-invalid-utf8.c (+5) 
- (modified) clang/test/Lexer/char-escapes-delimited.c (+1-1) 
- (modified) clang/test/SemaCXX/static-assert-cxx26.cpp (+1-1) 
- (modified) clang/test/SemaCXX/static-assert.cpp (+4-4) 
- (modified) clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp (+14-14) 
- (modified) clang/unittests/Basic/DiagnosticTest.cpp (+12-12) 
- (modified) llvm/include/llvm/Support/ConvertUTF.h (+4) 
- (modified) llvm/lib/Support/ConvertUTF.cpp (+2-3) 


``````````diff

diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index c033320687078..66e79e3b4300b 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -1877,7 +1877,8 @@ void ProcessWarningOptions(DiagnosticsEngine &Diags,
                            const DiagnosticOptions &Opts,
                            llvm::vfs::FileSystem &VFS, bool ReportDiags = true);
 void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl<char> &OutStr);
-llvm::SmallString<16> DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint);
+SmallString<16> EscapeSingleCodepointForDiagnostic(StringRef Str);
+SmallString<16> EscapeSingleCodepointForDiagnostic(llvm::UTF32 CP);
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 86b765fdf1fab..05f49a4dbd1ca 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -4664,7 +4664,7 @@ def warn_impcast_unicode_precision
       InGroup<CharacterConversion>;
 def warn_impcast_unicode_char_type_constant
     : Warning<"implicit conversion from %0 to %1 changes the meaning of the "
-              "%select{code unit|code point}2 '%3'">,
+              "%select{code unit|code point}2 %3">,
       InGroup<CharacterConversion>;
 
 def warn_comparison_unicode_mixed_types
@@ -4674,7 +4674,7 @@ def warn_comparison_unicode_mixed_types
 
 def warn_comparison_unicode_mixed_types_constant
     : Warning<"comparing values of different Unicode code unit types %0 and %1 "
-              "compares unrelated code units '%2' and '%3'">,
+              "compares unrelated code units %2 and %3">,
       InGroup<CharacterConversion>;
 
 def warn_xor_used_as_pow : Warning<
diff --git a/clang/lib/AST/ASTDiagnostic.cpp b/clang/lib/AST/ASTDiagnostic.cpp
index b8023cb6fa10f..f7888f58985db 100644
--- a/clang/lib/AST/ASTDiagnostic.cpp
+++ b/clang/lib/AST/ASTDiagnostic.cpp
@@ -2212,10 +2212,6 @@ std::string clang::FormatUTFCodeUnitAsCodepoint(unsigned Value, QualType T) {
     return std::string(Str.begin(), Str.end());
   }
 
-  char Buffer[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
-  char *Ptr = Buffer;
-  [[maybe_unused]] bool Converted = llvm::ConvertCodePointToUTF8(Value, Ptr);
-  assert(Converted && "trying to encode invalid code unit");
-  EscapeStringForDiagnostic(StringRef(Buffer, Ptr - Buffer), Str);
-  return std::string(Str.begin(), Str.end());
+  llvm::SmallString<16> Escaped = EscapeSingleCodepointForDiagnostic(Value);
+  return std::string(Escaped.begin(), Escaped.end());
 }
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 57fa8a16d3a50..4c63e54964ed6 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SpecialCaseList.h"
 #include "llvm/Support/Unicode.h"
@@ -1042,19 +1043,33 @@ void Diagnostic::FormatDiagnostic(SmallVectorImpl<char> &OutStr) const {
 
 /// EscapeStringForDiagnostic - Append Str to the diagnostic buffer,
 /// escaping non-printable characters and ill-formed code unit sequences.
-void clang::EscapeStringForDiagnostic(StringRef Str,
-                                      SmallVectorImpl<char> &OutStr) {
+static void EscapeStringForDiagnostic(StringRef Str,
+                                      SmallVectorImpl<char> &OutStr,
+                                      bool ForCodepoint) {
   OutStr.reserve(OutStr.size() + Str.size());
   auto *Begin = reinterpret_cast<const unsigned char *>(Str.data());
   llvm::raw_svector_ostream OutStream(OutStr);
-  const unsigned char *End = Begin + Str.size();
+  unsigned Size = Str.size();
+  const unsigned char *End = Begin + Size;
+  if (ForCodepoint) {
+    unsigned Size = llvm::getUTF8SequenceSize(Begin, End);
+    if (Size == 0)
+      Size = llvm::findMaximalSubpartOfIllFormedUTF8Sequence(Begin, End);
+    End = Begin + Size;
+  }
   while (Begin != End) {
-    // ASCII case
-    if (isPrintable(*Begin) || isWhitespace(*Begin)) {
+    if (!ForCodepoint && (isPrintable(*Begin) || isWhitespace(*Begin))) {
       OutStream << *Begin;
       ++Begin;
       continue;
     }
+    if (ForCodepoint && *Begin < 0x80) {
+      if (isPrintable(*Begin)) {
+        OutStream << "'" << *Begin << "'";
+        ++Begin;
+        continue;
+      }
+    }
     if (llvm::isLegalUTF8Sequence(Begin, End)) {
       llvm::UTF32 CodepointValue;
       llvm::UTF32 *CpPtr = &CodepointValue;
@@ -1069,40 +1084,49 @@ void clang::EscapeStringForDiagnostic(StringRef Str,
           "the sequence is legal UTF-8 but we couldn't convert it to UTF-32");
       assert(Begin == CodepointEnd &&
              "we must be further along in the string now");
+
       if (llvm::sys::unicode::isPrintable(CodepointValue) ||
-          llvm::sys::unicode::isFormatting(CodepointValue)) {
-        OutStr.append(CodepointBegin, CodepointEnd);
-        continue;
+          (!ForCodepoint && llvm::sys::unicode::isFormatting(CodepointValue))) {
+        OutStream << (ForCodepoint ? "'" : "")
+                  << StringRef(reinterpret_cast<const char *>(CodepointBegin),
+                               std::distance(CodepointBegin, CodepointEnd))
+                  << (ForCodepoint ? "' " : "");
+        if (!ForCodepoint)
+          continue;
       }
       // Unprintable code point.
-      OutStream << "<U+" << llvm::format_hex_no_prefix(CodepointValue, 4, true)
-                << ">";
+      OutStream << (ForCodepoint ? "" : "<") << "U+"
+                << llvm::format_hex_no_prefix(CodepointValue, 4, true)
+                << (ForCodepoint ? "" : ">");
       continue;
     }
     // Invalid code unit.
-    OutStream << "<" << llvm::format_hex_no_prefix(*Begin, 2, true) << ">";
+    OutStream << "<0x" << llvm::format_hex_no_prefix(*Begin, 2, true) << ">";
     ++Begin;
   }
 }
 
+/// EscapeStringForDiagnostic - Append Str to the diagnostic buffer,
+/// escaping non-printable characters and ill-formed code unit sequences.
+void clang::EscapeStringForDiagnostic(StringRef Str,
+                                      SmallVectorImpl<char> &OutStr) {
+  ::EscapeStringForDiagnostic(Str, OutStr, /*ForCodepoint=*/false);
+}
+
 /// Displays a single Unicode codepoint in U+NNNN notation, optionally
 /// prepending the quoted codepoint itself if printable.
-llvm::SmallString<16>
-clang::DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint) {
-  llvm::SmallString<16> Result;
-  if (llvm::sys::unicode::isPrintable(CodePoint)) {
-    std::string CharUTF8;
-    llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&CodePoint, 1),
-                                   CharUTF8);
-    Result.append("'");
-    Result.append(CharUTF8);
-    Result.append("' U+");
-  } else {
-    Result.append("U+");
-  }
-  llvm::raw_svector_ostream OS(Result);
-  llvm::write_hex(OS, CodePoint, llvm::HexPrintStyle::Upper, 4);
-  return Result;
+SmallString<16> clang::EscapeSingleCodepointForDiagnostic(StringRef Str) {
+  SmallString<16> CP;
+  ::EscapeStringForDiagnostic(Str, CP, /*ForCodepoint=*/true);
+  return CP;
+}
+
+SmallString<16> clang::EscapeSingleCodepointForDiagnostic(llvm::UTF32 CP) {
+  std::string Str;
+  bool Converted = convertUTF32ToUTF8String(ArrayRef<llvm::UTF32>(&CP, 1), Str);
+  if (!Converted)
+    return SmallString<16>(llvm::formatv("<{0:X+}>", CP).str());
+  return EscapeSingleCodepointForDiagnostic(Str);
 }
 
 void Diagnostic::FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 72a65dd156b19..54db19859d32f 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -561,17 +561,12 @@ static void DiagnoseInvalidUnicodeCharacterName(
       ++P;
       continue;
     }
-    const auto *Src = reinterpret_cast<const llvm::UTF8 *>(P);
-    const auto *SrcEnd = reinterpret_cast<const llvm::UTF8 *>(E);
-    llvm::UTF32 CodePoint = 0;
-    if (llvm::convertUTF8Sequence(&Src, SrcEnd, &CodePoint,
-                                  llvm::strictConversion) != llvm::conversionOK)
-      break;
     SourceLocation CharLoc = Lexer::AdvanceToTokenCharacter(
         Loc, (TokRangeBegin - TokBegin) + (P - Name.begin()), Loc.getManager(),
         Features);
     Diags->Report(CharLoc, diag::note_invalid_ucn_name_character)
-        << DisplayCodePointForDiagnostic(CodePoint);
+        << EscapeSingleCodepointForDiagnostic(
+               StringRef(P, std::distance(P, E)));
     HasIllegalCharacter = true;
     break;
   }
@@ -607,10 +602,9 @@ static void DiagnoseInvalidUnicodeCharacterName(
         3)
       break;
     Distance = Match.Distance;
-
     Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
          diag::note_invalid_ucn_name_candidate)
-        << Match.Name << DisplayCodePointForDiagnostic(Match.Value)
+        << Match.Name << EscapeSingleCodepointForDiagnostic(Match.Value)
         << FixItHint::CreateReplacement(
                MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
                                    TokRangeEnd),
diff --git a/clang/test/Lexer/char-escapes-delimited-invalid-utf8.c b/clang/test/Lexer/char-escapes-delimited-invalid-utf8.c
new file mode 100644
index 0000000000000..301302e1c786a
--- /dev/null
+++ b/clang/test/Lexer/char-escapes-delimited-invalid-utf8.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -x c++ -std=c++23
+
+unsigned h = U'\N{INVALID�}'; \
+//expected-error {{'INVALID<0x80>' is not a valid Unicode character name}}
+//expected-note  {{character <0x80> cannot appear in a Unicode character name}}
diff --git a/clang/test/Lexer/char-escapes-delimited.c b/clang/test/Lexer/char-escapes-delimited.c
index 1c20456701d75..d581c9760a364 100644
--- a/clang/test/Lexer/char-escapes-delimited.c
+++ b/clang/test/Lexer/char-escapes-delimited.c
@@ -83,7 +83,7 @@ void named(void) {
   char b  = '\N{DOLLAR SIGN}'; // ext-warning {{extension}} cxx23-warning {{C++23}}
   char b_ = '\N{ DOL-LAR _SIGN }'; // expected-error {{' DOL-LAR _SIGN ' is not a valid Unicode character name}} \
                                // expected-note {{character names in Unicode escape sequences are sensitive to case and whitespaces}} \
-                               // expected-note {{character '_' U+005F cannot appear in a Unicode character name}}
+                               // expected-note {{character '_' cannot appear in a Unicode character name}}
 
   char c = '\N{NOTATHING}'; // expected-error {{'NOTATHING' is not a valid Unicode character name}} \
                             // expected-note 5{{did you mean}}
diff --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp
index e9eb98dc72e04..644805a9e84c7 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -309,7 +309,7 @@ namespace EscapeInDiagnostic {
 static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
                                        // expected-note {{evaluates to ''\t' (0x09, 9) == '<U+0001>' (0x01, 1)'}}
 static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
-                                                   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+                                                   // expected-note {{evaluates to 'u8'<0x80>' (0x80, 128) == u8'<0x85>' (0x85, 133)'}}
 static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
                                                          // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
 }
diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp
index 354016db36432..c48791203733b 100644
--- a/clang/test/SemaCXX/static-assert.cpp
+++ b/clang/test/SemaCXX/static-assert.cpp
@@ -278,13 +278,13 @@ namespace Diagnostics {
   // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '<U+0000>' (0x00, 0)'", but if we write it as it is,
   // the "\n" cannot be consumed by the diagnostic consumer.
   static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
-                                                    // expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+                                                    // expected-note {{evaluates to '10 == '<0x85>' (0x85, -123)'}}
   static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
-                                                    // expected-note {{evaluates to ''<FC>' (0xFC, -4) == 248'}}
+                                                    // expected-note {{evaluates to ''<0xFC>' (0xFC, -4) == 248'}}
   static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
-                                               // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+                                               // expected-note {{evaluates to ''<0x80>' (0x80, -128) == '<0x85>' (0x85, -123)'}}
   static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
-                                             // expected-note {{evaluates to ''<A0>' (0xA0, -96) == ' ' (0x20, 32)'}}
+                                             // expected-note {{evaluates to ''<0xA0>' (0xA0, -96) == ' ' (0x20, 32)'}}
   static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
                                                   // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
   static_assert(L"＼／"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
diff --git a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp b/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
index f17f20ca25295..6f9f8f3898625 100644
--- a/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
+++ b/clang/test/SemaCXX/warn-implicit-unicode-conversions.cpp
@@ -19,12 +19,12 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
 
 
     c8(char32_t(0x7f));
-    c8(char32_t(0x80));   // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the code point '<U+0080>'}}
+    c8(char32_t(0x80));   // expected-warning {{implicit conversion from 'char32_t' to 'char8_t' changes the meaning of the code point U+0080}}
 
     c8(char16_t(0x7f));
-    c8(char16_t(0x80));   // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point '<U+0080>'}}
-    c8(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code unit '<0xD800>'}}
-    c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point '<U+E000>'}}
+    c8(char16_t(0x80));   // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point U+0080}}
+    c8(char16_t(0xD800)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code unit <0xD800>}}
+    c8(char16_t(0xE000)); // expected-warning {{implicit conversion from 'char16_t' to 'char8_t' changes the meaning of the code point U+E000}}
 
 
     c16(char32_t(0x7f));
@@ -36,8 +36,8 @@ void test(char8_t u8, char16_t u16, char32_t u32) {
 
 
     c32(char8_t(0x7f));
-    c32(char8_t(0x80)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0x80>'}}
-    c32(char8_t(0xFF)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit '<0xFF>'}}
+    c32(char8_t(0x80)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit <0x80>}}
+    c32(char8_t(0xFF)); // expected-warning {{implicit conversion from 'char8_t' to 'char32_t' changes the meaning of the code unit <0xFF>}}
 
 
     c32(char16_t(0x7f));
@@ -90,19 +90,19 @@ void test_comp(char8_t u8, char16_t u16, char32_t u32) {
     (void)(char8_t(0x7f) == char32_t(0x7f));
 
     (void)(char8_t(0x80) == char8_t(0x80));
-    (void)(char8_t(0x80) == char16_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units '<0x80>' and '<U+0080>}}
-    (void)(char8_t(0x80) == char32_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units '<0x80>' and '<U+0080>}}
+    (void)(char8_t(0x80) == char16_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units <0x80> and U+0080}}
+    (void)(char8_t(0x80) == char32_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units <0x80> and U+0080}}
 
     (void)(char8_t(0x80) == char8_t(0x7f));
-    (void)(char8_t(0x80) == char16_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units '<0x80>' and '<U+007F>'}}
-    (void)(char8_t(0x80) == char32_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units '<0x80>' and '<U+007F>'}}
+    (void)(char8_t(0x80) == char16_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char16_t' compares unrelated code units <0x80> and U+007F}}
+    (void)(char8_t(0x80) == char32_t(0x7f)); // expected-warning{{comparing values of different Unicode code unit types 'char8_t' and 'char32_t' compares unrelated code units <0x80> and U+007F}}
 
 
     (void)(char16_t(0x7f) < char8_t(0x7f));
     (void)(char16_t(0x7f) < char16_t(0x7f));
     (void)(char16_t(0x7f) < char32_t(0x7f));
 
-    (void)(char16_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' compares unrelated code units '<U+0080>' and '<0x80>'}}
+    (void)(char16_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char8_t' compares unrelated code units U+0080 and <0x80>}}
     (void)(char16_t(0x80) < char16_t(0x80));
     (void)(char16_t(0x80) < char32_t(0x80));
 
@@ -115,7 +115,7 @@ void test_comp(char8_t u8, char16_t u16, char32_t u32) {
     (void)(char32_t(0x7f) < char16_t(0x7f));
     (void)(char32_t(0x7f) < char32_t(0x7f));
 
-    (void)(char32_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' compares unrelated code units '<U+0080>' and '<0x80>'}}
+    (void)(char32_t(0x80) < char8_t(0x80)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char8_t' compares unrelated code units U+0080 and <0x80>}}
     (void)(char32_t(0x80) < char16_t(0x80));
     (void)(char32_t(0x80) < char32_t(0x80));
 
@@ -124,10 +124,10 @@ void test_comp(char8_t u8, char16_t u16, char32_t u32) {
     (void)(char32_t(0x80) < char32_t(0x7f));
 
 
-    (void)(char32_t(U'🐉') <= char16_t(0xD800)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' compares unrelated code units '🐉' and '<0xD800>'}}
+    (void)(char32_t(U'🐉') <= char16_t(0xD800)); // expected-warning{{comparing values of different Unicode code unit types 'char32_t' and 'char16_t' compares unrelated code units '🐉' U+1F409 and <0xD800>}}
     (void)(char32_t(U'🐉') <= char16_t(0xD7FF));
 
-    (void)(char16_t(0xD800) >= char32_t(U'🐉')); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' compares unrelated code units '<0xD800>' and '🐉'}}
+    (void)(char16_t(0xD800) >= char32_t(U'🐉')); // expected-warning{{comparing values of different Unicode code unit types 'char16_t' and 'char32_t' compares unrelated code units <0xD800> and '🐉' U+1F409}}
     (void)(char16_t(0x...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/206990