[clang] [clang] Improve diagnostics for invalid named-universal-characters (PR #206326)

Tue Jun 30 01:14:22 PDT 2026

https://github.com/eisenwave updated https://github.com/llvm/llvm-project/pull/206326

>From e2a20d710a1de9b67bcefe162ec3034249722c35 Mon Sep 17 00:00:00 2001
From: Eisenwave <me at eisenwave.net>
Date: Sun, 28 Jun 2026 14:08:36 +0200
Subject: [PATCH 1/5] [clang] Improve diagnostics for invalid
 named-universal-characters

1. Fix typo in note_invalid_ucn_name_loose_matching message.
2. Fix unprintable characterss appearing in diagnostic messages.
3. Stop offering low-value fix suggestions when illegal characters appear in the name.
---
 .../include/clang/Basic/DiagnosticLexKinds.td |  6 +-
 clang/lib/Lex/LiteralSupport.cpp              | 60 ++++++++++++++++---
 clang/test/CXX/drs/cwg26xx.cpp                |  6 +-
 clang/test/Lexer/char-escapes-delimited.c     | 10 +++-
 clang/test/Lexer/unicode.c                    |  2 +-
 clang/test/Preprocessor/ucn-pp-identifier.c   |  2 +-
 6 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td
index 383bf1a7fdb3f..decfd6c781dbf 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -170,9 +170,11 @@ def err_hex_escape_no_digits : Error<
 def err_invalid_ucn_name : Error<
   "'%0' is not a valid Unicode character name">;
 def note_invalid_ucn_name_loose_matching : Note<
-  "characters names in Unicode escape sequences are sensitive to case and whitespaces">;
+  "character names in Unicode escape sequences are sensitive to case and whitespaces">;
 def note_invalid_ucn_name_candidate : Note<
-  "did you mean %0 ('%2' U+%1)?">;
+  "did you mean %0 (%1)?">;
+def note_invalid_ucn_name_character : Note<
+  "character %0 cannot appear in a Unicode character name">;
 
 def warn_ucn_escape_no_digits : Warning<
   "\\%0 used with no following hex digits; "
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 482146ccf8654..e941e5eecbe8a 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -539,6 +539,12 @@ static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
   return !HasError;
 }
 
+static bool AllowedInCharacterName(llvm::UTF32 CodePoint) {
+  return (CodePoint >= U'A' && CodePoint <= U'Z') ||
+         (CodePoint >= U'0' && CodePoint <= U'9') || CodePoint == U'-' ||
+         CodePoint == U' ';
+}
+
 static void DiagnoseInvalidUnicodeCharacterName(
     DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc Loc,
     const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd,
@@ -550,6 +556,44 @@ static void DiagnoseInvalidUnicodeCharacterName(
 
   namespace u = llvm::sys::unicode;
 
+  auto StringifyCodePoint = [](llvm::UTF32 CodePoint) -> llvm::SmallString<16> {
+    llvm::SmallString<16> Result;
+    if (u::isPrintable(CodePoint)) {
+      std::string CharUTF8;
+      llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&CodePoint, 1),
+                                     CharUTF8);
+      Result.append("'");
+      Result.append(CharUTF8);
+      Result.append("' U+");
+    } else {
+      Result.append("U+");
+    }
+    llvm::raw_svector_ostream OS(Result);
+    llvm::write_hex(OS, CodePoint, llvm::HexPrintStyle::Upper, 4);
+    return Result;
+  };
+
+  bool HasIllegalCharacter = false;
+  for (const char *P = Name.begin(), *E = Name.end(); P != E;) {
+    const auto *Src = reinterpret_cast<const llvm::UTF8 *>(P);
+    const auto *SrcEnd = reinterpret_cast<const llvm::UTF8 *>(E);
+    llvm::UTF32 CodePoint = 0;
+    if (llvm::convertUTF8Sequence(&Src, SrcEnd, &CodePoint,
+                                  llvm::strictConversion) != llvm::conversionOK)
+      break;
+    if (AllowedInCharacterName(CodePoint)) {
+      P = reinterpret_cast<const char *>(Src);
+      continue;
+    }
+    SourceLocation CharLoc = Lexer::AdvanceToTokenCharacter(
+        Loc, (TokRangeBegin - TokBegin) + (P - Name.begin()), Loc.getManager(),
+        Features);
+    Diags->Report(CharLoc, diag::note_invalid_ucn_name_character)
+        << StringifyCodePoint(CodePoint);
+    HasIllegalCharacter = true;
+    break;
+  }
+
   std::optional<u::LooseMatchingResult> Res =
       u::nameToCodepointLooseMatching(Name);
   if (Res) {
@@ -562,6 +606,12 @@ static void DiagnoseInvalidUnicodeCharacterName(
     return;
   }
 
+  // Providing illegal characters suggests a fundamental misuse of the feature,
+  // like providing emoji in \N{}. Offering alternative suggestions is often
+  // unhelpful in that scenario.
+  if (HasIllegalCharacter)
+    return;
+
   unsigned Distance = 0;
   SmallVector<u::MatchForCodepointName> Matches =
       u::nearestMatchesForCodepointName(Name, 5);
@@ -576,17 +626,9 @@ static void DiagnoseInvalidUnicodeCharacterName(
       break;
     Distance = Match.Distance;
 
-    std::string Str;
-    llvm::UTF32 V = Match.Value;
-    bool Converted =
-        llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&V, 1), Str);
-    (void)Converted;
-    assert(Converted && "Found a match wich is not a unicode character");
-
     Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
          diag::note_invalid_ucn_name_candidate)
-        << Match.Name << llvm::utohexstr(Match.Value)
-        << Str // FIXME: Fix the rendering of non printable characters
+        << Match.Name << StringifyCodePoint(Match.Value)
         << FixItHint::CreateReplacement(
                MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
                                    TokRangeEnd),
diff --git a/clang/test/CXX/drs/cwg26xx.cpp b/clang/test/CXX/drs/cwg26xx.cpp
index 45653743ae574..6a15513879bbd 100644
--- a/clang/test/CXX/drs/cwg26xx.cpp
+++ b/clang/test/CXX/drs/cwg26xx.cpp
@@ -214,11 +214,7 @@ int \N{Λ} = 0;
 // expected-error at -2 {{expected unqualified-id}}
 const char* emoji = "\N{🤡}";
 // expected-error at -1 {{'🤡' is not a valid Unicode character name}}
-//   expected-note at -2 {{did you mean OX ('🐂' U+1F402)?}}
-//   expected-note at -3 {{did you mean ANT ('🐜' U+1F41C)?}}
-//   expected-note at -4 {{did you mean ARC ('⌒' U+2312)?}}
-//   expected-note at -5 {{did you mean AXE ('🪓' U+1FA93)?}}
-//   expected-note at -6 {{did you mean BAT ('🦇' U+1F987)?}}
+// expected-note at -2 {{character '🤡' U+1F921 cannot appear in a Unicode character name}}
 
 #define z(x) 0
 #define cwg2640_a z(
diff --git a/clang/test/Lexer/char-escapes-delimited.c b/clang/test/Lexer/char-escapes-delimited.c
index 7a8986bc5f867..1c20456701d75 100644
--- a/clang/test/Lexer/char-escapes-delimited.c
+++ b/clang/test/Lexer/char-escapes-delimited.c
@@ -82,7 +82,8 @@ void named(void) {
 
   char b  = '\N{DOLLAR SIGN}'; // ext-warning {{extension}} cxx23-warning {{C++23}}
   char b_ = '\N{ DOL-LAR _SIGN }'; // expected-error {{' DOL-LAR _SIGN ' is not a valid Unicode character name}} \
-                               // expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespaces}}
+                               // expected-note {{character names in Unicode escape sequences are sensitive to case and whitespaces}} \
+                               // expected-note {{character '_' U+005F cannot appear in a Unicode character name}}
 
   char c = '\N{NOTATHING}'; // expected-error {{'NOTATHING' is not a valid Unicode character name}} \
                             // expected-note 5{{did you mean}}
@@ -100,9 +101,12 @@ void named(void) {
   unsigned k = u'\N{LOTUS';                       // expected-error {{incomplete universal character name}}
 
   const char* emoji = "\N{🤡}"; // expected-error {{'🤡' is not a valid Unicode character name}} \
-                                // expected-note 5{{did you mean}}
+                                // expected-note {{character '🤡' U+1F921 cannot appear in a Unicode character name}}
   const char* nested = "\N{\N{SPARKLE}}"; // expected-error {{'\N{SPARKLE' is not a valid Unicode character name}} \
-                                          // expected-note 5{{did you mean}}
+                                          // expected-note {{cannot appear in a Unicode character name}}
+  const char* line_feed = "\N{LINE FEE}"; // expected-error {{'LINE FEE' is not a valid Unicode character name}} \
+                                          // expected-note {{did you mean LINE FEED (U+000A)}} \
+                                          // expected-note 4{{did you mean}}
 }
 
 void separators(void) {
diff --git a/clang/test/Lexer/unicode.c b/clang/test/Lexer/unicode.c
index e0489e11b9da9..2d1edc77a1550 100644
--- a/clang/test/Lexer/unicode.c
+++ b/clang/test/Lexer/unicode.c
@@ -49,7 +49,7 @@ extern int \u{16D80};   // CHISOI LETTER A - Added in Unicode 18.0
 extern int a\N{TANGSA LETTER GA};
 extern int a\N{TANGSALETTERGA}; // expected-error {{'TANGSALETTERGA' is not a valid Unicode character name}} \
                                 // expected-error {{expected ';' after top level declarator}} \
-                                // expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespace}}
+                                // expected-note {{character names in Unicode escape sequences are sensitive to case and whitespace}}
 
 extern int 𝛛; // expected-warning {{mathematical notation character <U+1D6DB> in an identifier is a Clang extension}}
 extern int ₉; // expected-error {{character <U+2089> not allowed at the start of an identifier}} \\
diff --git a/clang/test/Preprocessor/ucn-pp-identifier.c b/clang/test/Preprocessor/ucn-pp-identifier.c
index 5efcfe48f638a..ee008a73eb882 100644
--- a/clang/test/Preprocessor/ucn-pp-identifier.c
+++ b/clang/test/Preprocessor/ucn-pp-identifier.c
@@ -131,7 +131,7 @@ C 1
                        // expected-error {{macro name must be an identifier}}
 #define \NN            // expected-warning {{incomplete universal character name; treating as '\' followed by identifier}} expected-error {{macro name must be an identifier}}
 #define \N{GREEK_SMALL-LETTERALPHA}  // expected-error {{'GREEK_SMALL-LETTERALPHA' is not a valid Unicode character name}} \
-                                     // expected-note {{characters names in Unicode escape sequences are sensitive to case and whitespaces}}
+                                     // expected-note {{character names in Unicode escape sequences are sensitive to case and whitespaces}}
 #define \N{🤡}  // expected-error {{'🤡' is not a valid Unicode character name}} \
                 // expected-error {{macro name must be an identifier}}
 

>From e8edc90cb9edd8aaaf0e1b5c7824a144e666b1ad Mon Sep 17 00:00:00 2001
From: Eisenwave <me at eisenwave.net>
Date: Tue, 30 Jun 2026 06:28:03 +0200
Subject: [PATCH 2/5] Avoid unconditional UTF-8 decode, make
 DisplayCodePointForDiagnostic function

---
 clang/include/clang/Basic/Diagnostic.h |  2 ++
 clang/lib/Basic/Diagnostic.cpp         | 20 +++++++++++++++
 clang/lib/Lex/LiteralSupport.cpp       | 35 ++++++--------------------
 3 files changed, 30 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index 826b747f2c751..c033320687078 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -28,6 +28,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ConvertUTF.h"
 #include <cassert>
 #include <cstdint>
 #include <limits>
@@ -1876,6 +1877,7 @@ void ProcessWarningOptions(DiagnosticsEngine &Diags,
                            const DiagnosticOptions &Opts,
                            llvm::vfs::FileSystem &VFS, bool ReportDiags = true);
 void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl<char> &OutStr);
+llvm::SmallString<16> DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint);
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 4802478c379bb..a4d1efab6437f 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -1085,6 +1085,26 @@ void clang::EscapeStringForDiagnostic(StringRef Str,
   }
 }
 
+/// DisplayCodePointForDiagnostic - Display CodePoint in U+NNNN notation,
+/// optionally prepending the CodePoint itself if it is printable.
+llvm::SmallString<16>
+clang::DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint) {
+  llvm::SmallString<16> Result;
+  if (llvm::sys::unicode::isPrintable(CodePoint)) {
+    std::string CharUTF8;
+    llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&CodePoint, 1),
+                                   CharUTF8);
+    Result.append("'");
+    Result.append(CharUTF8);
+    Result.append("' U+");
+  } else {
+    Result.append("U+");
+  }
+  llvm::raw_svector_ostream OS(Result);
+  llvm::write_hex(OS, CodePoint, llvm::HexPrintStyle::Upper, 4);
+  return Result;
+}
+
 void Diagnostic::FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
                                   SmallVectorImpl<char> &OutStr) const {
   // When the diagnostic string is only "%0", the entire string is being given
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index e941e5eecbe8a..193a23eab75c6 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -539,10 +539,8 @@ static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
   return !HasError;
 }
 
-static bool AllowedInCharacterName(llvm::UTF32 CodePoint) {
-  return (CodePoint >= U'A' && CodePoint <= U'Z') ||
-         (CodePoint >= U'0' && CodePoint <= U'9') || CodePoint == U'-' ||
-         CodePoint == U' ';
+static bool AllowedInCharacterName(char C) {
+  return (C >= 'A' && C < 'Z') || (C >= '0' && C < '9') || C == '-' || C == ' ';
 }
 
 static void DiagnoseInvalidUnicodeCharacterName(
@@ -556,40 +554,23 @@ static void DiagnoseInvalidUnicodeCharacterName(
 
   namespace u = llvm::sys::unicode;
 
-  auto StringifyCodePoint = [](llvm::UTF32 CodePoint) -> llvm::SmallString<16> {
-    llvm::SmallString<16> Result;
-    if (u::isPrintable(CodePoint)) {
-      std::string CharUTF8;
-      llvm::convertUTF32ToUTF8String(llvm::ArrayRef<llvm::UTF32>(&CodePoint, 1),
-                                     CharUTF8);
-      Result.append("'");
-      Result.append(CharUTF8);
-      Result.append("' U+");
-    } else {
-      Result.append("U+");
-    }
-    llvm::raw_svector_ostream OS(Result);
-    llvm::write_hex(OS, CodePoint, llvm::HexPrintStyle::Upper, 4);
-    return Result;
-  };
-
   bool HasIllegalCharacter = false;
   for (const char *P = Name.begin(), *E = Name.end(); P != E;) {
+    if (AllowedInCharacterName(*P)) {
+      ++P;
+      continue;
+    }
     const auto *Src = reinterpret_cast<const llvm::UTF8 *>(P);
     const auto *SrcEnd = reinterpret_cast<const llvm::UTF8 *>(E);
     llvm::UTF32 CodePoint = 0;
     if (llvm::convertUTF8Sequence(&Src, SrcEnd, &CodePoint,
                                   llvm::strictConversion) != llvm::conversionOK)
       break;
-    if (AllowedInCharacterName(CodePoint)) {
-      P = reinterpret_cast<const char *>(Src);
-      continue;
-    }
     SourceLocation CharLoc = Lexer::AdvanceToTokenCharacter(
         Loc, (TokRangeBegin - TokBegin) + (P - Name.begin()), Loc.getManager(),
         Features);
     Diags->Report(CharLoc, diag::note_invalid_ucn_name_character)
-        << StringifyCodePoint(CodePoint);
+        << DisplayCodePointForDiagnostic(CodePoint);
     HasIllegalCharacter = true;
     break;
   }
@@ -628,7 +609,7 @@ static void DiagnoseInvalidUnicodeCharacterName(
 
     Diag(Diags, Features, Loc, TokBegin, TokRangeBegin, TokRangeEnd,
          diag::note_invalid_ucn_name_candidate)
-        << Match.Name << StringifyCodePoint(Match.Value)
+        << Match.Name << DisplayCodePointForDiagnostic(Match.Value)
         << FixItHint::CreateReplacement(
                MakeCharSourceRange(Features, Loc, TokBegin, TokRangeBegin,
                                    TokRangeEnd),

>From f8e310fe6c42b59b5d43aac07946fc5c509124b9 Mon Sep 17 00:00:00 2001
From: Eisenwave <me at eisenwave.net>
Date: Tue, 30 Jun 2026 07:22:26 +0200
Subject: [PATCH 3/5] Implement review suggestions, fix wrong ranges in
 allowedInCharacterName

---
 clang/lib/Basic/Diagnostic.cpp   | 4 ++--
 clang/lib/Lex/LiteralSupport.cpp | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index a4d1efab6437f..7165eb734c4df 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -1085,8 +1085,8 @@ void clang::EscapeStringForDiagnostic(StringRef Str,
   }
 }
 
-/// DisplayCodePointForDiagnostic - Display CodePoint in U+NNNN notation,
-/// optionally prepending the CodePoint itself if it is printable.
+/// Displays CodePoint in U+NNNN notation, optionally prepending the quoted
+/// CodePoint itself if printable.
 llvm::SmallString<16>
 clang::DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint) {
   llvm::SmallString<16> Result;
diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp
index 193a23eab75c6..72a65dd156b19 100644
--- a/clang/lib/Lex/LiteralSupport.cpp
+++ b/clang/lib/Lex/LiteralSupport.cpp
@@ -539,8 +539,9 @@ static bool ProcessNumericUCNEscape(const char *ThisTokBegin,
   return !HasError;
 }
 
-static bool AllowedInCharacterName(char C) {
-  return (C >= 'A' && C < 'Z') || (C >= '0' && C < '9') || C == '-' || C == ' ';
+static bool allowedInCharacterName(char C) {
+  return (C >= 'A' && C <= 'Z') || (C >= '0' && C <= '9') || C == '-' ||
+         C == ' ';
 }
 
 static void DiagnoseInvalidUnicodeCharacterName(
@@ -556,7 +557,7 @@ static void DiagnoseInvalidUnicodeCharacterName(
 
   bool HasIllegalCharacter = false;
   for (const char *P = Name.begin(), *E = Name.end(); P != E;) {
-    if (AllowedInCharacterName(*P)) {
+    if (allowedInCharacterName(*P)) {
       ++P;
       continue;
     }

>From 6a082e950358d925b109445ef9fdd834dd351740 Mon Sep 17 00:00:00 2001
From: Eisenwave <me at eisenwave.net>
Date: Tue, 30 Jun 2026 10:12:30 +0200
Subject: [PATCH 4/5] Add unit tests for DisplayCodePointForDiagnosticTest

---
 clang/unittests/Basic/DiagnosticTest.cpp | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp
index 4ced52c8f715f..5cc0c39e5cb51 100644
--- a/clang/unittests/Basic/DiagnosticTest.cpp
+++ b/clang/unittests/Basic/DiagnosticTest.cpp
@@ -449,4 +449,24 @@ TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) {
 }
 #endif
 
+TEST(DisplayCodePointForDiagnosticTest, printableDisplaysQuoted) {
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U'A'), "'A' U+0041");
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U'🤡'), "'🤡' U+1F921");
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U' '), "' ' U+0020");
+}
+
+TEST(DisplayCodePointForDiagnosticTest, nonPrintableDisplaysNoQuoted) {
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U'\n'), "U+000A");
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U'\0'), "U+0000");
+  EXPECT_EQ(DisplayCodePointForDiagnostic(U'\x1B'), "U+001B");
+}
+
+TEST(DisplayCodePointForDiagnosticTest, nonScalarValues) {
+  // Low and high surrogates:
+  EXPECT_EQ(DisplayCodePointForDiagnostic(0xD800), "U+D800");
+  EXPECT_EQ(DisplayCodePointForDiagnostic(0xDFFF), "U+DFFF");
+  // Overly large values:
+  EXPECT_EQ(DisplayCodePointForDiagnostic(0x110000), "U+110000");
+}
+
 } // namespace

>From 5ba95599f857ae0c172e648dc17daa4c600d6e8f Mon Sep 17 00:00:00 2001
From: Eisenwave <me at eisenwave.net>
Date: Tue, 30 Jun 2026 10:13:59 +0200
Subject: [PATCH 5/5] Adjust comment wording

---
 clang/lib/Basic/Diagnostic.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 7165eb734c4df..57fa8a16d3a50 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -1085,8 +1085,8 @@ void clang::EscapeStringForDiagnostic(StringRef Str,
   }
 }
 
-/// Displays CodePoint in U+NNNN notation, optionally prepending the quoted
-/// CodePoint itself if printable.
+/// Displays a single Unicode codepoint in U+NNNN notation, optionally
+/// prepending the quoted codepoint itself if printable.
 llvm::SmallString<16>
 clang::DisplayCodePointForDiagnostic(llvm::UTF32 CodePoint) {
   llvm::SmallString<16> Result;