[clang] 2176c5e - [Clang][Sema] Fix display of characters on static assertion failure

Takuya Shimizu via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 3 22:10:53 PDT 2023


Author: Takuya Shimizu
Date: 2023-10-04T14:09:06+09:00
New Revision: 2176c5e510e3bfcbc75afb13e78d287141f239a7

URL: https://github.com/llvm/llvm-project/commit/2176c5e510e3bfcbc75afb13e78d287141f239a7
DIFF: https://github.com/llvm/llvm-project/commit/2176c5e510e3bfcbc75afb13e78d287141f239a7.diff

LOG: [Clang][Sema] Fix display of characters on static assertion failure

This patch fixes the display of characters appearing in LHS or RHS of == expression in notes to static assertion failure.
This applies C-style escape if the printed character is a special character. This also adds a numerical value displayed next to the character representation.
This also tries to print multi-byte characters if the user-provided expression is multi-byte char type.

Reviewed By: cor3ntin
Differential Revision: https://reviews.llvm.org/D155610

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/Diagnostic.h
    clang/lib/Basic/Diagnostic.cpp
    clang/lib/Sema/SemaDeclCXX.cpp
    clang/test/Lexer/cxx1z-trigraphs.cpp
    clang/test/SemaCXX/static-assert-cxx26.cpp
    clang/test/SemaCXX/static-assert.cpp

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3dce7f6a8f9d56e..4c8b85fc29755c1 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -222,6 +222,41 @@ Improvements to Clang's diagnostics
 - ``-Wfixed-enum-extension`` and ``-Wmicrosoft-fixed-enum`` diagnostics are no longer
   emitted when building as C23, since C23 standardizes support for enums with a
   fixed underlying type.
+- When describing the failure of static assertion of `==` expression, clang prints the integer
+  representation of the value as well as its character representation when
+  the user-provided expression is of character type. If the character is
+  non-printable, clang now shows the escpaed character.
+  Clang also prints multi-byte characters if the user-provided expression
+  is of multi-byte character type.
+
+  *Example Code*:
+
+  .. code-block:: c++
+
+     static_assert("A\n"[1] == U'🌍');
+
+  *BEFORE*:
+
+  .. code-block:: text
+
+    source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
+    1 | static_assert("A\n"[1] == U'🌍');
+      |               ^~~~~~~~~~~~~~~~~
+    source:1:24: note: expression evaluates to ''
+    ' == 127757'
+    1 | static_assert("A\n"[1] == U'🌍');
+      |               ~~~~~~~~~^~~~~~~~
+
+  *AFTER*:
+
+  .. code-block:: text
+
+    source:1:15: error: static assertion failed due to requirement '"A\n"[1] == U'\U0001f30d''
+    1 | static_assert("A\n"[1] == U'🌍');
+      |               ^~~~~~~~~~~~~~~~~
+    source:1:24: note: expression evaluates to ''\n' (0x0A, 10) == U'🌍' (0x1F30D, 127757)'
+    1 | static_assert("A\n"[1] == U'🌍');
+      |               ~~~~~~~~~^~~~~~~~
 
 Bug Fixes in This Version
 -------------------------

diff  --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h
index 5606a22fe9d68b4..3df037b793b3946 100644
--- a/clang/include/clang/Basic/Diagnostic.h
+++ b/clang/include/clang/Basic/Diagnostic.h
@@ -1840,7 +1840,7 @@ const char ToggleHighlight = 127;
 void ProcessWarningOptions(DiagnosticsEngine &Diags,
                            const DiagnosticOptions &Opts,
                            bool ReportDiags = true);
-
+void EscapeStringForDiagnostic(StringRef Str, SmallVectorImpl<char> &OutStr);
 } // namespace clang
 
 #endif // LLVM_CLANG_BASIC_DIAGNOSTIC_H

diff  --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index 7a54d27ef9d8ee2..0208ccc31bd7fc0 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -800,9 +800,10 @@ FormatDiagnostic(SmallVectorImpl<char> &OutStr) const {
   FormatDiagnostic(Diag.begin(), Diag.end(), OutStr);
 }
 
-/// pushEscapedString - Append Str to the diagnostic buffer,
+/// EscapeStringForDiagnostic - Append Str to the diagnostic buffer,
 /// escaping non-printable characters and ill-formed code unit sequences.
-static void pushEscapedString(StringRef Str, SmallVectorImpl<char> &OutStr) {
+void clang::EscapeStringForDiagnostic(StringRef Str,
+                                      SmallVectorImpl<char> &OutStr) {
   OutStr.reserve(OutStr.size() + Str.size());
   auto *Begin = reinterpret_cast<const unsigned char *>(Str.data());
   llvm::raw_svector_ostream OutStream(OutStr);
@@ -854,7 +855,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
       StringRef(DiagStr, DiagEnd - DiagStr).equals("%0") &&
       getArgKind(0) == DiagnosticsEngine::ak_std_string) {
     const std::string &S = getArgStdStr(0);
-    pushEscapedString(S, OutStr);
+    EscapeStringForDiagnostic(S, OutStr);
     return;
   }
 
@@ -961,7 +962,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
     case DiagnosticsEngine::ak_std_string: {
       const std::string &S = getArgStdStr(ArgNo);
       assert(ModifierLen == 0 && "No modifiers for strings yet");
-      pushEscapedString(S, OutStr);
+      EscapeStringForDiagnostic(S, OutStr);
       break;
     }
     case DiagnosticsEngine::ak_c_string: {
@@ -971,7 +972,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
       // Don't crash if get passed a null pointer by accident.
       if (!S)
         S = "(null)";
-      pushEscapedString(S, OutStr);
+      EscapeStringForDiagnostic(S, OutStr);
       break;
     }
     // ---- INTEGERS ----

diff  --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 7ebb38ec2ec0c49..f9c010b1a002488 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -49,6 +49,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include <map>
 #include <optional>
@@ -17026,10 +17027,74 @@ Decl *Sema::ActOnStaticAssertDeclaration(SourceLocation StaticAssertLoc,
                                       AssertMessageExpr, RParenLoc, false);
 }
 
+static void WriteCharTypePrefix(BuiltinType::Kind BTK, llvm::raw_ostream &OS) {
+  switch (BTK) {
+  case BuiltinType::Char_S:
+  case BuiltinType::Char_U:
+    break;
+  case BuiltinType::Char8:
+    OS << "u8";
+    break;
+  case BuiltinType::Char16:
+    OS << 'u';
+    break;
+  case BuiltinType::Char32:
+    OS << 'U';
+    break;
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U:
+    OS << 'L';
+    break;
+  default:
+    llvm_unreachable("Non-character type");
+  }
+}
+
+/// Convert character's value, interpreted as a code unit, to a string.
+/// The value needs to be zero-extended to 32-bits.
+/// FIXME: This assumes Unicode literal encodings
+static void WriteCharValueForDiagnostic(uint32_t Value, const BuiltinType *BTy,
+                                        unsigned TyWidth,
+                                        SmallVectorImpl<char> &Str) {
+  char Arr[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
+  char *Ptr = Arr;
+  BuiltinType::Kind K = BTy->getKind();
+  llvm::raw_svector_ostream OS(Str);
+
+  // This should catch Char_S, Char_U, Char8, and use of escaped characters in
+  // other types.
+  if (K == BuiltinType::Char_S || K == BuiltinType::Char_U ||
+      K == BuiltinType::Char8 || Value <= 0x7F) {
+    StringRef Escaped = escapeCStyle<EscapeChar::Single>(Value);
+    if (!Escaped.empty())
+      EscapeStringForDiagnostic(Escaped, Str);
+    else
+      OS << static_cast<char>(Value);
+    return;
+  }
+
+  switch (K) {
+  case BuiltinType::Char16:
+  case BuiltinType::Char32:
+  case BuiltinType::WChar_S:
+  case BuiltinType::WChar_U: {
+    if (llvm::ConvertCodePointToUTF8(Value, Ptr))
+      EscapeStringForDiagnostic(StringRef(Arr, Ptr - Arr), Str);
+    else
+      OS << "\\x"
+         << llvm::format_hex_no_prefix(Value, TyWidth / 4, /*Upper=*/true);
+    break;
+  }
+  default:
+    llvm_unreachable("Non-character type is passed");
+  }
+}
+
 /// Convert \V to a string we can present to the user in a diagnostic
 /// \T is the type of the expression that has been evaluated into \V
 static bool ConvertAPValueToString(const APValue &V, QualType T,
-                                   SmallVectorImpl<char> &Str) {
+                                   SmallVectorImpl<char> &Str,
+                                   ASTContext &Context) {
   if (!V.hasValue())
     return false;
 
@@ -17044,13 +17109,38 @@ static bool ConvertAPValueToString(const APValue &V, QualType T,
              "Bool type, but value is not 0 or 1");
       llvm::raw_svector_ostream OS(Str);
       OS << (BoolValue ? "true" : "false");
-    } else if (T->isCharType()) {
+    } else {
+      llvm::raw_svector_ostream OS(Str);
       // Same is true for chars.
-      Str.push_back('\'');
-      Str.push_back(V.getInt().getExtValue());
-      Str.push_back('\'');
-    } else
+      // We want to print the character representation for textual types
+      const auto *BTy = T->getAs<BuiltinType>();
+      if (BTy) {
+        switch (BTy->getKind()) {
+        case BuiltinType::Char_S:
+        case BuiltinType::Char_U:
+        case BuiltinType::Char8:
+        case BuiltinType::Char16:
+        case BuiltinType::Char32:
+        case BuiltinType::WChar_S:
+        case BuiltinType::WChar_U: {
+          unsigned TyWidth = Context.getIntWidth(T);
+          assert(8 <= TyWidth && TyWidth <= 32 && "Unexpected integer width");
+          uint32_t CodeUnit = static_cast<uint32_t>(V.getInt().getZExtValue());
+          WriteCharTypePrefix(BTy->getKind(), OS);
+          OS << '\'';
+          WriteCharValueForDiagnostic(CodeUnit, BTy, TyWidth, Str);
+          OS << "' (0x"
+             << llvm::format_hex_no_prefix(CodeUnit, /*Width=*/2,
+                                           /*Upper=*/true)
+             << ", " << V.getInt() << ')';
+          return true;
+        }
+        default:
+          break;
+        }
+      }
       V.getInt().toString(Str);
+    }
 
     break;
 
@@ -17147,8 +17237,9 @@ void Sema::DiagnoseStaticAssertDetails(const Expr *E) {
 
       Side->EvaluateAsRValue(DiagSide[I].Result, Context, true);
 
-      DiagSide[I].Print = ConvertAPValueToString(
-          DiagSide[I].Result.Val, Side->getType(), DiagSide[I].ValueString);
+      DiagSide[I].Print =
+          ConvertAPValueToString(DiagSide[I].Result.Val, Side->getType(),
+                                 DiagSide[I].ValueString, Context);
     }
     if (DiagSide[0].Print && DiagSide[1].Print) {
       Diag(Op->getExprLoc(), diag::note_expr_evaluates_to)

diff  --git a/clang/test/Lexer/cxx1z-trigraphs.cpp b/clang/test/Lexer/cxx1z-trigraphs.cpp
index 2e47c05e9bd33b8..6b39ff6716605bf 100644
--- a/clang/test/Lexer/cxx1z-trigraphs.cpp
+++ b/clang/test/Lexer/cxx1z-trigraphs.cpp
@@ -21,7 +21,7 @@ error here;
 
 #if !ENABLED_TRIGRAPHS
 // expected-error at 11 {{}} expected-warning at 11 {{trigraph ignored}}
-// expected-error at 13 {{failed}} expected-warning at 13 {{trigraph ignored}} expected-note at 13 {{evaluates to ''?' == '#''}}
+// expected-error at 13 {{failed}} expected-warning at 13 {{trigraph ignored}} expected-note at 13 {{evaluates to ''?' (0x3F, 63) == '#' (0x23, 35)'}}
 // expected-error at 16 {{}}
 // expected-error at 20 {{}}
 #else

diff  --git a/clang/test/SemaCXX/static-assert-cxx26.cpp b/clang/test/SemaCXX/static-assert-cxx26.cpp
index c2bb2d2ebc6ffb2..b19aac6cabd1324 100644
--- a/clang/test/SemaCXX/static-assert-cxx26.cpp
+++ b/clang/test/SemaCXX/static-assert-cxx26.cpp
@@ -298,3 +298,12 @@ Good<Frobble> a; // expected-note {{in instantiation}}
 Bad<int> b; // expected-note {{in instantiation}}
 
 }
+
+namespace EscapeInDiagnostic {
+static_assert('\u{9}' == (char)1, ""); // expected-error {{failed}} \
+                                       // expected-note {{evaluates to ''\t' (0x09, 9) == '<U+0001>' (0x01, 1)'}}
+static_assert((char8_t)-128 == (char8_t)-123, ""); // expected-error {{failed}} \
+                                                   // expected-note {{evaluates to 'u8'<80>' (0x80, 128) == u8'<85>' (0x85, 133)'}}
+static_assert((char16_t)0xFEFF == (char16_t)0xDB93, ""); // expected-error {{failed}} \
+                                                         // expected-note {{evaluates to 'u'' (0xFEFF, 65279) == u'\xDB93' (0xDB93, 56211)'}}
+}

diff  --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp
index 5d64ff682a20e5e..4200d821339edeb 100644
--- a/clang/test/SemaCXX/static-assert.cpp
+++ b/clang/test/SemaCXX/static-assert.cpp
@@ -268,7 +268,31 @@ namespace Diagnostics {
     return 'c';
   }
   static_assert(getChar() == 'a', ""); // expected-error {{failed}} \
-                                       // expected-note {{evaluates to ''c' == 'a''}}
+                                       // expected-note {{evaluates to ''c' (0x63, 99) == 'a' (0x61, 97)'}}
+  static_assert((char)9 == '\x61', ""); // expected-error {{failed}} \
+                                        // expected-note {{evaluates to ''\t' (0x09, 9) == 'a' (0x61, 97)'}}
+  static_assert((char)10 == '\0', ""); // expected-error {{failed}} \
+                                       // expected-note {{n' (0x0A, 10) == '<U+0000>' (0x00, 0)'}}
+  // The note above is intended to match "evaluates to '\n' (0x0A, 10) == '<U+0000>' (0x00, 0)'", but if we write it as it is,
+  // the "\n" cannot be consumed by the diagnostic consumer.
+  static_assert((signed char)10 == (char)-123, ""); // expected-error {{failed}} \
+                                                    // expected-note {{evaluates to '10 == '<85>' (0x85, -123)'}}
+  static_assert((char)-4 == (unsigned char)-8, ""); // expected-error {{failed}} \
+                                                    // expected-note {{evaluates to ''<FC>' (0xFC, -4) == 248'}}
+  static_assert((char)-128 == (char)-123, ""); // expected-error {{failed}} \
+                                               // expected-note {{evaluates to ''<80>' (0x80, -128) == '<85>' (0x85, -123)'}}
+  static_assert('\xA0' == (char)'\x20', ""); // expected-error {{failed}} \
+                                             // expected-note {{evaluates to ''<A0>' (0xA0, -96) == ' ' (0x20, 32)'}}
+  static_assert((char16_t)L'ゆ' == L"C̵̭̯̠̎͌ͅť̺"[1], ""); // expected-error {{failed}} \
+                                                  // expected-note {{evaluates to 'u'ゆ' (0x3086, 12422) == L'̵' (0x335, 821)'}}
+  static_assert(L"\/"[1] == u'\xFFFD', ""); // expected-error {{failed}} \
+                                              // expected-note {{evaluates to 'L'/' (0xFF0F, 65295) == u'�' (0xFFFD, 65533)'}}
+  static_assert(L"⚾"[0] == U'🌍', ""); // expected-error {{failed}} \
+                                         // expected-note {{evaluates to 'L'⚾' (0x26BE, 9918) == U'🌍' (0x1F30D, 127757)'}}
+  static_assert(U"\a"[0] == (wchar_t)9, ""); // expected-error {{failed}} \
+                                             // expected-note {{evaluates to 'U'\a' (0x07, 7) == L'\t' (0x09, 9)'}}
+  static_assert(L"§"[0] == U'Ö', ""); // expected-error {{failed}} \
+                                      // expected-note {{evaluates to 'L'§' (0xA7, 167) == U'Ö' (0xD6, 214)'}}
 
   /// Bools are printed as bools.
   constexpr bool invert(bool b) {


        


More information about the cfe-commits mailing list